Пример #1
0
    def play_with_agents(self, agt1, agt2):
        player_turn = 1
        player1_wins = 0

        for i in range(self.G):
            print("[{}>{}]".format("-" * i, "." * (self.G - i - 1)), end="\r")
            sm = StateManager(5)
            agent = MCTS(exploration_rate=1, anet=agt1)
            game = sm.create_game()
            tree = Tree(game, chanceOfRandom=0.0)

            state = tree.root
            while not sm.is_finished():

                if player_turn == 1:
                    agent.anet = agt1
                    best_child = agent.uct_search(tree, state,
                                                  num_search_games)
                else:
                    agent.anet = agt2
                    best_child = agent.uct_search(tree, state,
                                                  num_search_games)

                game.execute_move(best_child.move)
                state = best_child

            if sm.get_winner() == 1:
                player1_wins += 1

        print("{} won {}/{} against {}.".format(agt1.name, player1_wins,
                                                self.G, agt2.name))
        print(np.reshape(sm.game.board, (boardsize, boardsize)))
Пример #2
0
import numpy as np
import collections


def train_anet(anet, RBUF):
    # Creates a minibatch of the RBUF and trains the anet on the minibatch
    batch_size = min(len(RBUF), 32)
    minibatch = random.sample(RBUF, batch_size)
    anet.train(minibatch)


""" Initializations """
anet = ANET(size=board_size)
agent = MCTS(exploration_rate=1, anet=anet)
sm = StateManager()
game = sm.create_game()
tree = Tree(game)
win_stats = []
RBUF = collections.deque(maxlen=rbuf_max_size)

for i in range(offset, num_of_games + 1):
    print("Episode: {}/{}".format(i, num_of_games))
    state = tree.root

    while (not sm.is_game_over()):
        best_child, training_case = agent.uct_search(tree, state,
                                                     num_search_games)
        RBUF.append(training_case)
        sm.move(best_child.move)
        state = best_child
        if verbose and i == num_of_games: