Пример #1
0
def playMatches(player1,
                player2,
                EPISODES,
                logger,
                turns_until_tau0,
                memory=None,
                goes_first=0):
    env = Game()
    scores = {player1.name: 0, "drawn": 0, player2.name: 0}
    sp_scores = {'sp': 0, "drawn": 0, 'nsp': 0}
    points = {player1.name: [], player2.name: []}

    for e in range(EPISODES):

        logger.info('====================')
        logger.info('EPISODE %d OF %d', e + 1, EPISODES)
        logger.info('====================')

        #        print (str(e+1) + ' ', end='\n')
        print(str(e + 1) + ' ', end='')

        state = env.reset()

        done = 0
        turn = 0
        player1.mcts = None
        player2.mcts = None

        if goes_first == 0:
            player1Starts = random.randint(0, 1) * 2 - 1
        else:
            player1Starts = goes_first

        if player1Starts == 1:
            players = {
                1: {
                    "agent": player1,
                    "name": player1.name
                },
                -1: {
                    "agent": player2,
                    "name": player2.name
                }
            }
            logger.info(player1.name + ' plays as X')
        else:
            players = {
                1: {
                    "agent": player2,
                    "name": player2.name
                },
                -1: {
                    "agent": player1,
                    "name": player1.name
                }
            }
            logger.info(player2.name + ' plays as X')
            logger.info('--------------')

        env.gameState.render(logger)
        env.gameState.render_print(logger)

        while done == 0:
            turn = turn + 1

            #### Run the MCTS algo and return an action
            if turn < turns_until_tau0:
                action, pi, MCTS_value, NN_value = players[
                    state.playerTurn]['agent'].act(state, 1)
            else:
                action, pi, MCTS_value, NN_value = players[
                    state.playerTurn]['agent'].act(state, 0)

            if memory != None:
                ####Commit the move to memory
                memory.commit_stmemory(env.identities, state, pi)

            logger.info('action: %d', action)
            for r in range(env.grid_shape[0]):
                logger.info([
                    '----' if x == 0 else '{0:.2f}'.format(np.round(x, 2))
                    for x in pi[env.grid_shape[1] * r:(env.grid_shape[1] * r +
                                                       env.grid_shape[1])]
                ])
            logger.info('MCTS perceived value for %s: %f',
                        state.pieces[str(state.playerTurn)],
                        np.round(MCTS_value, 2))
            logger.info('NN perceived value for %s: %f',
                        state.pieces[str(state.playerTurn)],
                        np.round(NN_value, 2))
            logger.info('====================')

            ### Do the action
            state, value, done, _ = env.step(
                action
            )  #the value of the newState from the POV of the new playerTurn i.e. -1 if the previous player played a winning move

            env.gameState.render(logger)
            env.gameState.render_print(logger)
            import datetime
            now = datetime.datetime.now()
            gameName = env.name
            if done == 1:
                if memory != None:
                    #### If the game is finished, assign the values correctly to the game moves
                    for move in memory.stmemory:
                        if move['playerTurn'] == state.playerTurn:
                            move['value'] = value
                        else:
                            move['value'] = -value

                    memory.commit_ltmemory()
                if value == 1:
                    logger.info('%s WINS!', players[state.playerTurn]['name'])
                    print('WINS!', players[state.playerTurn]['name'])
                    if player2.name.find(gameName) > -1:
                        if players[state.playerTurn]['name'] == player1.name:
                            Provider.addCompet(player1.name, player2.name,
                                               gameName, now, 1)
                        if players[state.playerTurn]['name'] == player2.name:
                            Provider.addCompet(player1.name, player2.name,
                                               gameName, now, 2)
                    else:
                        if players[state.playerTurn]['name'] == player1.name:
                            Provider.addCompetWithUser(player1.name,
                                                       player2.name, gameName,
                                                       now, 1)
                        if players[state.playerTurn]['name'] == player2.name:
                            Provider.addCompetWithUser(player1.name,
                                                       player2.name, gameName,
                                                       now, 2)
                    scores[players[state.playerTurn]['name']] = scores[players[
                        state.playerTurn]['name']] + 1

                    if state.playerTurn == 1:
                        sp_scores['sp'] = sp_scores['sp'] + 1

                    else:
                        sp_scores['nsp'] = sp_scores['nsp'] + 1

                elif value == -1:
                    logger.info('%s WINS!', players[-state.playerTurn]['name'])
                    print(' WINS!', players[-state.playerTurn]['name'])
                    if player2.name.find(gameName) > -1:
                        if players[-state.playerTurn]['name'] == player1.name:
                            Provider.addCompet(player1.name, player2.name,
                                               gameName, now, 2)
                        if players[-state.playerTurn]['name'] == player2.name:
                            Provider.addCompet(player1.name, player2.name,
                                               gameName, now, 1)
                    else:
                        if players[-state.playerTurn]['name'] == player1.name:
                            Provider.addCompetWithUser(player1.name,
                                                       player2.name, gameName,
                                                       now, 1)
                        if players[-state.playerTurn]['name'] == player2.name:
                            Provider.addCompetWithUser(player1.name,
                                                       player2.name, gameName,
                                                       now, 2)
                    scores[players[-state.playerTurn]['name']] = scores[
                        players[-state.playerTurn]['name']] + 1

                    if state.playerTurn == 1:
                        sp_scores['nsp'] = sp_scores['nsp'] + 1
                    else:
                        sp_scores['sp'] = sp_scores['sp'] + 1

                else:
                    logger.info('DRAW...')
                    print('DRAW...')
                    Provider.addCompet(player1.name, player2.name, gameName,
                                       now, 0)
                    scores['drawn'] = scores['drawn'] + 1
                    sp_scores['drawn'] = sp_scores['drawn'] + 1

                pts = state.score
                points[players[state.playerTurn]['name']].append(pts[0])
                points[players[-state.playerTurn]['name']].append(pts[1])

    return (scores, memory, points, sp_scores)
Пример #2
0
def train(players, size, in_a_row, agents, episodes):
    """ Сыграть несколько партий с agents """
    env = Game(players, size, in_a_row)

    wins = [0] * players
    loses = [0] * players
    draws = 0

    for episode in range(1, episodes + 1):
        done = [False] * players
        msgs = [None] * players
        state = env.reset().state
        while not min(done):
            # Пройти по всем пользователям, пока для всех не закончится партия
            for j, ag in enumerate(agents):
                if not done[j]:
                    action = ag.action(state)
                    new_map, reward, done[j], msgs[j] = env.action(action)
                    new_state = new_map.state
                    ag.fit(state, new_state, action, reward)
                    state = new_state
        else:
            for ag in agents:
                ag.decay(episode)

            if Message.DRAWMESSAGE in msgs:
                draws += 1
            else:
                # Посчитать победы
                for j, m in enumerate(msgs):
                    if m == Message.WINMESSAGE:
                        wins[j] += 1
                    elif m == Message.LOSEMESSAGE:
                        loses[j] += 1

            if episode % 10_000 == 0:
                # Отобразить статистику
                print(f"Игра №{episode}")
                for j in range(players):
                    print(
                        f"\tИгрок {j}. Побед: {wins[j]}. Поражений: {loses[j]}"
                    )
                print(f"\tНичьих: {draws}")

            if episode % 20_000 == 0 and (Message.WINMESSAGE in msgs
                                          or Message.DRAWMESSAGE in msgs):
                # Отобразить поле в конце партии
                if Message.DRAWMESSAGE in msgs:
                    print(Message.DRAWMESSAGE[1])
                else:
                    print(f"Игрок №{msgs.index(Message.WINMESSAGE)} выиграл")
                print(env.game_map)

            if episode % 200_000 == 0 and episode:
                # Сохранить Q-таблицы
                for j, ag in enumerate(agents):
                    # ag.save(f"dumps/Player{j}-{players}-{size[0]}x{size[1]}-{in_a_row}-{episode}eps-{int(time.time())}.pickle")
                    ag.save(
                        f"dumps/Player{j}-{players}-{size[0]}x{size[1]}-{in_a_row}-last.pickle"
                    )
                print("Q-таблицы сохранены")
Пример #3
0
class TicTacToeTest(unittest.TestCase):
    def setUp(self):
        self.game = Game("player 1", "player 2")

    def test_init(self):
        self.assertEqual(self.game.board,
                         [["-", "-", "-"], ["-", "-", "-"], ["-", "-", "-"]],
                         'Game board does not initialize to empty board')
        self.assertEqual(self.game.status, IN_PROGRESS,
                         'Game status does not initialize to in progress')
        self.assertEqual(self.game.move_count, 0,
                         'Game move does not initialize to 0')
        self.assertEqual(self.game.cur_player, self.game.players[0],
                         'Current player does not '
                         'initialize to player 1')

    def test_update_board(self):
        #Test updating the board at a specified row, col pair
        self.game.update_board(0, 0)
        self.assertEqual(self.game.board,
                         [["X", "-", "-"], ["-", "-", "-"], ["-", "-", "-"]],
                         'Board updates incorrectly')

    def test_check_status_won(self):
        #Check if recognizes a winning configuration consisting of 3 X's in a row
        self.game.board = [["X", "X", "X"], ["-", "-", "-"], ["-", "-", "-"]]
        self.assertEqual(
            self.game.check_status(0, 0), WON,
            'Board doesnt recognize winning configuration across a row')

        # Check if recognizes a winning configuration of 3 O's in a column
        self.game.board = [["-", "O", "-"], ["-", "O", "-"], ["-", "O", "-"]]
        self.assertEqual(
            self.game.check_status(1, 1), WON,
            'Board doesnt recognize winning configuration across a column')

        # Check if recognizes a winning configuration of 3 X's across one diagonal
        self.game.board = [["X", "-", "-"], ["-", "X", "-"], ["-", "-", "X"]]
        self.assertEqual(
            self.game.check_status(2, 2), WON,
            'Board doesnt recognize winning configuration across diagonal from top-left'
            ' to bottom-right')

        # Check if recognizes a winning configuration of 3 O's across the other diagonal
        self.game.board = [["-", "-", "O"], ["-", "O", "-"], ["O", "-", "-"]]
        self.assertEqual(
            self.game.check_status(0, 2), WON,
            'Board doesnt recognize winning configuration across diagonal from top-right'
            ' to bottom-left')

        # Check if recognizes a tied configuration, where the board is full and nobody won
        self.game.board = [["X", "X", "O"], ["O", "X", "X"], ["X", "O", "O"]]
        self.game.move_count = 9
        self.assertEqual(self.game.check_status(1, 1), DRAW,
                         'Board does not recognize draw'
                         'configuration')

        # Check if recognizes nobody won and the game is still in progress because there is at least one empty space
        self.game.board = [["X", "X", "O"], ["O", "X", "X"], ["X", "O", "-"]]
        self.game.move_count = 8
        self.assertEqual(
            self.game.check_status(1, 1), IN_PROGRESS,
            'Board does not recognize'
            ' in progress configuration')

    def test_alternate_player(self):
        # Test if successfully alternates cur_player from player 1 to player 2
        self.game.cur_player = self.game.players[0]
        player = self.game.alternate_player()
        self.assertEqual(player, self.game.players[1],
                         'Alternate from player 1'
                         ' to player 2 not working')

        # Test if successfully alternates cur_player from player 2 to player 1
        self.game.cur_player = self.game.players[1]
        player = self.game.alternate_player()
        self.assertEqual(player, self.game.players[0],
                         'Alternate from player 2'
                         ' to player 1 not working')

    def test_reset(self):
        # Test resetting all board attributes after a game has been completed

        #First update the game to reflect a completed game
        self.game.status = 2
        self.game.board = [["X", "O", "-"], ["O", "X", "O"], ["-", "X", "X"]]
        self.game.move_count = 7
        self.game.cur_player = self.game.players[1]

        #Then reset the board and compare it to the desired values
        self.game.reset("name 1", "name 2")
        self.assertEqual(self.game.board,
                         [["-", "-", "-"], ["-", "-", "-"], ["-", "-", "-"]],
                         'Game board does not reset to empty board')
        self.assertEqual(self.game.status, IN_PROGRESS,
                         'Game status does not reset to in progress')
        self.assertEqual(self.game.move_count, 0,
                         'Game move does not reset to 0')
        self.assertEqual(self.game.cur_player, self.game.players[0],
                         'Current player does not '
                         'reset to player 1')

    def test_get_players(self):
        # Test that creates HumanPlayers and ComputerPlayers when appropriate and that they initialize correctly
        players = self.game.get_players("cpu", "human")
        self.assertEqual(
            players[0].is_human, False, "Creates a HumanPlayer when"
            "A ComputerPlayer should have been created")
        self.assertEqual(
            players[1].is_human, True, "Creates a ComputerPlayer when"
            "A HumanPlayer should have been created")
        self.assertEqual(players[0].id, 0, "Creates the wrong id for a player")
        self.assertEqual(players[1].letter, "O",
                         "Creates the wrong letter for a player")
        self.assertEqual(players[0].name, "cpu",
                         "Creates wrong name for a player")

    def test_alternate_letters(self):
        # Test that alternate from X to O correctly
        letter = ComputerPlayer.alternate_letters("X")
        self.assertEqual(letter, "O", "X did not alternate to O")

        # Test that alternate from O to X correctly
        letter = ComputerPlayer.alternate_letters("O")
        self.assertEqual(letter, "X", "O did not alternate to X")

    def test_evaluate_board(self):
        player = ComputerPlayer(0, "cpu", "X")

        # Check a win state
        win_board = [["X", "O", "-"], ["O", "X", "O"], ["-", "X", "X"]]
        value = player.evaluate_board(win_board, 2)
        self.assertEqual(value, WINNER,
                         "Didnt recognize a won board configuration")

        # Check a win state with a full board
        win_board = [["X", "O", "X"], ["O", "X", "O"], ["O", "X", "X"]]
        value = player.evaluate_board(win_board, 0)
        self.assertEqual(value, WINNER,
                         "Didnt recognize a won board configuration")

        # Check a loss state
        loss_board = [["-", "X", "O"], ["-", "O", "X"], ["O", "X", "-"]]
        value = player.evaluate_board(loss_board, 3)
        self.assertEqual(value, LOSER,
                         "Didnt recognize a lost board configuration")

        # Check a loss state with full board
        loss_board = [["X", "X", "O"], ["O", "O", "X"], ["O", "X", "X"]]
        value = player.evaluate_board(loss_board, 0)
        self.assertEqual(value, LOSER,
                         "Didnt recognize a lost board configuration")

        # Check a tie state
        tie_board = [["X", "X", "O"], ["O", "O", "X"], ["X", "O", "X"]]
        value = player.evaluate_board(tie_board, 0)
        self.assertEqual(value, TIED,
                         "Didnt recognize a tied board configuration")

        # Check an in progress state
        in_progress_board = [["X", "X", "O"], ["O", "X", "X"], ["X", "O", "-"]]
        value = player.evaluate_board(in_progress_board, 1)
        self.assertEqual(value, IN_PROGRESS,
                         "Didnt recognize an in progress board configuration")

    def test_minimax(self):
        # Check if chooses move to win
        player = ComputerPlayer(0, "cpu", "X")
        board = [["X", "-", "O"], ["X", "-", "O"], ["-", "-", "-"]]
        depth = 5
        best_move = player.minimax(board=board,
                                   depth=depth,
                                   maximizing=True,
                                   letter=player.letter,
                                   alpha=-100,
                                   beta=100)
        self.assertEqual(best_move, [2, 0, WINNER],
                         "Doesn't chose move that will win the game")

        # Checks if chooses move to prevent loss and win
        player = ComputerPlayer(0, "cpu", "X")
        board = [["-", "-", "X"], ["-", "O", "-"], ["X", "-", "O"]]
        depth = 5
        best_move = player.minimax(board=board,
                                   depth=depth,
                                   maximizing=True,
                                   letter=player.letter,
                                   alpha=-100,
                                   beta=100)
        row, col = best_move[0], best_move[1]
        self.assertEqual([row, col], [0, 0],
                         "Doesn't chose move that prevent a loss and win")

        # Checks if chooses move to prevent loss and tie
        player = ComputerPlayer(0, "cpu", "X")
        board = [["-", "-", "-"], ["-", "O", "X"], ["-", "X", "O"]]
        depth = 5
        best_move = player.minimax(board=board,
                                   depth=depth,
                                   maximizing=True,
                                   letter=player.letter,
                                   alpha=-100,
                                   beta=100)
        row, col = best_move[0], best_move[1]
        self.assertEqual([row, col], [0, 0],
                         "Doesn't chose move that prevent a loss")

        # Check if can see a few moves ahead and prevent opponent from forcing a victory
        player = ComputerPlayer(1, "cpu", "O")
        board = [["-", "-", "X"], ["-", "O", "-"], ["X", "-", "-"]]
        depth = 5
        best_move = player.minimax(board=board,
                                   depth=depth,
                                   maximizing=True,
                                   letter=player.letter,
                                   alpha=-100,
                                   beta=100)
        row, col = best_move[0], best_move[1]
        self.assertNotEqual(
            [row, col], [0, 0],
            "Chose move that allows opponent to force a victory")
        self.assertNotEqual(
            [row, col], [2, 2],
            "Chose move that allows opponent to force a victory")
Пример #4
0
"""


def ask_to_play_again():
    """
    Ask the user if they would like to play again. Keep asking until user returns yes or no
    """
    response = input("\nWould you like to play again? Enter yes or no: ")

    if response == "yes":
        playing = True
    elif response == "no":
        playing = False
    else:
        print("Response not recognized. You must enter yes or no.")
        return ask_to_play_again()
    return playing


if __name__ == "__main__":
    print("Welcome to Tic Tac Toe")
    playing = True
    game = Game()

    while playing:
        game.reset()
        game.play_game()
        playing = ask_to_play_again()

    print("Exiting Tic Tac Toe. Thanks for playing!")