Exemplo n.º 1
0
def play_as_mcts(batch_data, networks, config, connection):
    """
    Play a batch of games as MCTS vs. MCTS.
    """
    roots = create_roots(batch_data)

    # Get network evaluation from main process.
    data = pack_data_for_eval(batch_data, networks, roots)
    connection.send(("evaluate", data))

    policies, values = connection.recv()
    log(f"Root policies:\n{policies}")
    log(f"OG root values: {values}")
    values = expand_nodes(batch_data, roots, policies, values)
    prepare_actions(batch_data, roots)

    for _ in range(config.MCTS_ITERATIONS):
        selected_nodes = select_nodes(batch_data, roots)

        data = pack_data_for_eval(batch_data, networks, selected_nodes)
        connection.send(("evaluate", data))
        policies, values = connection.recv()
        values = expand_nodes(batch_data, selected_nodes, policies, values)
        backprop_nodes(batch_data, selected_nodes, values)
    return roots
Exemplo n.º 2
0
    def __init__(self, game, playouts=None):
        super().__init__(game)
        if playouts:
            self.ITERATIONS = playouts
        else:
            self.ITERATIONS = Config.MCTS_ITERATIONS

        log("MCTS is using {} playouts.".format(self.ITERATIONS))
Exemplo n.º 3
0
    def execute_action(self, state):
        super.__doc__

        log("Waiting for player input...")
        self.state = None
        self.gui.add_action_listener(self)
        while self.state is None and self.gui.active:
            sleep(0.1)
        return self.state
Exemplo n.º 4
0
    def execute_action(self, state):
        super.__doc__
        actions = self.game.actions(state)
        index = int(uniform(0, len(actions)))
        chosen = actions[index]

        #self.game.store_random_statistics({a: uniform() for a in actions})

        log("Random action: {}".format(chosen))
        return self.game.result(state, chosen)
Exemplo n.º 5
0
    def __init__(self, game):
        GameAI.__init__(self, game)
        self.tpt = dict()  # Transposition table.

        game_name = type(self.game).__name__
        if game_name == "Latrunculi":
            self.MAX_DEPTH = 12 - self.game.size if self.game.size < 8 else 5
        elif game_name == "Connect_Four":
            self.MAX_DEPTH = 13 - self.game.size
        elif game_name == "Othello":
            self.MAX_DEPTH = 15 - self.game.size
        log(f"Minimax is using a max search depth of {self.MAX_DEPTH}")
Exemplo n.º 6
0
def softmax_sample(child_nodes, visit_counts, tempature=2.5):
    """
    Perform softmax sampling on a set of nodes
    based on a probability distribution of their
    visit counts.
    """
    sum_visits = sum(visit_counts)
    prob_visits = [(v/sum_visits) * tempature for v in visit_counts]
    exps = np.exp(prob_visits)
    log(f"Probabilities of softmax: {exps/sum(exps)}")

    return np.random.choice(child_nodes,
                            p=exps/sum(exps))
Exemplo n.º 7
0
    def show_move(self, state, action):
        player_color = self.player_color(state.player)
        if action:
            source, dest = action.source, action.dest
            log("{} moved from {} to {}".format(player_color, source, dest))
            self.draw_status_text("{} moved from {} to {}".format(
                player_color, source, dest))
        else:
            log("{} has no moves, pass made.".format(player_color))
            self.draw_status_text("{} passed".format(player_color))

        result = self.game.result(state, action)
        self.update(result)
Exemplo n.º 8
0
    def __init__(self, game, playouts=Config.MCTS_ITERATIONS):
        super().__init__(game)
        if playouts is not None:
            self.ITERATIONS = playouts
            self.MAX_MOVES = 5000
        elif self.game.size > 3:
            playout_options = [800, 200, 35, 20, 10, 5, 5]
            max_moves = [400, 1200, 1600, 2400, 5000, 5000, 5000]
            self.ITERATIONS = playout_options[self.game.size - 4]
            self.MAX_MOVES = max_moves[self.game.size - 4]

        log("MCTS is using {} playouts and {} max moves.".format(
            self.ITERATIONS, self.MAX_MOVES))
Exemplo n.º 9
0
    def getorigin(self, eventorigin):
        x = eventorigin.x
        y = eventorigin.y

        coords = self.field_clicked(x, y, self.state.board, self.left_space,
                                    self.top_space, self.board_field_size)
        # print("Coords: {}".format(coords))
        if self.is_currentPlayer_piece(
                self.state.player, self.state.board[coords]
        ) and self.mouseclick_move_list == [] and self.has_legal_move(coords):
            self.mouseclick_move_list.append(coords)
            self.draw_status_text(
                "Selected source coords: ({})".format(coords))
        elif self.game.action_type == "single":
            if self.is_legal_move(None, coords):
                self.make_move(self.state, Action(None, coords))
                self.mouseclick_move_list.clear()
        else:
            if self.mouseclick_move_list == [] and self.is_currentPlayer_piece(
                    self.state.player,
                    int(self.state.board[coords[0], coords[1]] * -0.5)):
                self.make_move(self.state, Action(coords, coords))
            elif len(self.mouseclick_move_list) == 1:
                if self.mouseclick_move_list[0] == coords:
                    self.mouseclick_move_list.pop()
                elif self.is_currentPlayer_piece(self.state.player,
                                                 self.state.board[coords]):
                    self.mouseclick_move_list.pop()
                    self.mouseclick_move_list.append(coords)
                else:
                    if self.is_legal_move(self.mouseclick_move_list[0],
                                          coords):
                        self.draw_status_text(
                            "Selected destination coords: ({})".format(coords))
                        self.mouseclick_move_list.append(coords)

                        self.make_move(
                            self.state,
                            Action(self.mouseclick_move_list[0],
                                   self.mouseclick_move_list[1]))
                        self.mouseclick_move_list.clear()

            elif len(self.mouseclick_move_list) > 2:
                self.mouseclick_move_list.pop()

        log("mouseclick_move_list\n{}".format(self.mouseclick_move_list))

        self.update(self.state)
Exemplo n.º 10
0
    def execute_action(self, state):
        super.__doc__
        self.player = state.player
        actions = self.game.actions(state)
        if actions == [None]:
            return self.game.result(state, None)
        best_action = None
        highest_value = -float("inf")

        self.time_started = time()
        # Traverse possible actions, using minimax to calculate best action to take.
        for action in actions:
            result = self.game.result(state, action)
            value = self.minimax(result, self.MAX_DEPTH, False, -10000, 10000)
            if value > highest_value:
                highest_value = value
                best_action = action

        log("Minimax action: {} worth: {}".format(best_action, highest_value))
        return self.game.result(state, best_action)
Exemplo n.º 11
0
    def execute_action(self, node):
        super.__doc__
        best_node = self.choose_action(node)
        self.chosen_node = best_node

        log(f"Root: {node}")
        for n in node.children.values():
            log(n.pretty_desc())

        log("MCTS action: {}, q value: {}.".format(best_node.action, best_node.q_value))
        self.game.store_search_statistics(node)
        return best_node.state
Exemplo n.º 12
0
    def execute_action(self, state):
        super.__doc__
        log("MCTS is calculating the best move...")

        root_node = Node(state, None)

        # Perform iterations of selection, simulation, expansion, and back propogation.
        # After the iterations are done, the child of the root node with the highest
        # number of mean value (value/visits) are chosen as the best action.
        for _ in range(self.ITERATIONS):
            node = self.select(root_node)
            if node.visits > 0 and not self.game.terminal_test(node.state):
                # Expand tree from available actions. Select first expanded node as
                # new current and simulate an action from this nodes possible actions.
                actions = self.game.actions(node.state)
                self.expand(node, actions)
                node = node.children[
                    actions[0]]  # Select first child of expanded Node.

            # Perform rollout, simulate till end of game and return outcome.
            value = self.rollout(root_node.state, node)
            self.back_propagate(
                node, -value
                if node.state.player == root_node.state.player else value)

            node = root_node

        for node in root_node.children.values():
            log(node.pretty_desc())

        best_node = max(root_node.children.values(), key=lambda n: n.visits)
        root_node = None

        log("MCTS action: {}, likelihood of win: {}%".format(
            best_node.action, int((best_node.mean_value * 50) + 50)))

        return best_node.state
Exemplo n.º 13
0
def play_games(games,
               w_players,
               b_players,
               config,
               network_steps=None,
               gui=None,
               connection=None):
    """
    Play a number of games to the end, with capabilities for playing as any
    type of agent and any type of game.

    Parameters:
        games         - List of game objects to be played out, the state of the games
                        (result and state history) are updated during the process.
        w_players     - List of agents controlling the white pieces.
        b_players     - List of agents controlling the black pieces.
        config        - Config object with a variety of parameters to be used during the game.
        network_steps - Dictionary of game -> dict, mapping which game should target which
                        generation of neural network. Only relevant if MCTS is used.
        gui           - GUI object used to visualize the games, only available if batch-play
                        is not active, meaning only one game is played.
        connection    - Pipe object with connection to the main process. Used when requesting
                        network evaluating among other things.
    """
    # List of lists. Each containing a game to be played,
    # the current state for that game, the agent playing as player1,
    # and the agent playing as player 2.
    batch_data = [[
        games[i], games[i].start_state(), w_players[i], b_players[i]
    ] for i in range(len(games))]
    total_games = len(games)
    counters = [0 for _ in games]  # Counting amount of moves for each game.

    if gui is not None:
        sleep(1)
        # Update GUI, to clear board, if several games are played sequentially.
        gui.update(batch_data[0][1])

    while batch_data:
        player = batch_data[0][2] if batch_data[0][0].player(
            batch_data[0][1]) else batch_data[0][3]
        time_turn = time()
        if is_mcts(player):
            # Run MCTS simulations. Get resulting root nodes.
            roots = play_as_mcts(batch_data, network_steps, config, connection)

        finished_games_indexes = []
        for (i, (game, state, player_1, player_2)) in enumerate(batch_data):
            player = player_1 if game.player(state) else player_2
            # execute_action receives a root node if player is MCTS, else it gives a state.
            state = player.execute_action(
                roots[i] if is_mcts(player) else state)
            batch_data[i][1] = state

            if gui is not None:
                if type(w_players).__name__ != "Human" and not state.player:
                    sleep(config.GUI_AI_SLEEP)
                elif type(b_players).__name__ != "Human" and state.player:
                    sleep(config.GUI_AI_SLEEP)
                gui.update(state)

            log(state)

            counters[i] = counters[i] + 1
            if game.terminal_test(
                    state) or counters[i] > config.LATRUNCULI_MAX_MOVES:
                finished_games_indexes.append(i)
                util = game.utility(state, True)
                game.terminal_value = util
                winner = "White" if util == 1 else "Black" if util == -1 else "Draw"
                log(f"Game over! Winner: {winner}")
            else:
                # Append state to game history, unless the state is terminal.
                game.history.append(state)

        turn_took = "{0:.3f}".format((time() - time_turn))
        num_active = len(batch_data)
        num_moves = len(batch_data[0][0].history)
        name_1, name_2 = type(batch_data[0][2]).__name__, type(
            batch_data[0][3]).__name__
        elems_removed = 0
        # Removes games that are finished.
        for i in finished_games_indexes:
            batch_data.pop(i - elems_removed)
            elems_removed += 1

        num_active -= elems_removed
        if connection:
            # Send logging information to main process if playing as MCTS.
            status = (f"Moves: {num_moves}. Active games: " +
                      f"{num_active}/{total_games}. Turn took {turn_took} s")
            if name_1 != "MCTS" or name_2 != "MCTS":
                status += " - Eval vs. {}".format(name_1 if name_2 ==
                                                  "MCTS" else name_2)
            elif network_steps is not None:
                status += " - Eval vs Macro Networks"
            connection.send(("log", [status, getpid()]))
Exemplo n.º 14
0
 def notify(self, observable, *args, **kwargs):
     log("Got {}, {} {}".format(args, kwargs, observable))
     self.state = observable.actions(self.state)
     self.update(self.state)