def playMatches(player1, player2, EPISODES, logger, turns_until_tau0, memory=None, goes_first=0): env = Game() scores = {player1.name: 0, "drawn": 0, player2.name: 0} sp_scores = {'sp': 0, "drawn": 0, 'nsp': 0} points = {player1.name: [], player2.name: []} for e in range(EPISODES): logger.info('====================') logger.info('EPISODE %d OF %d', e + 1, EPISODES) logger.info('====================') # print (str(e+1) + ' ', end='\n') print(str(e + 1) + ' ', end='') state = env.reset() done = 0 turn = 0 player1.mcts = None player2.mcts = None if goes_first == 0: player1Starts = random.randint(0, 1) * 2 - 1 else: player1Starts = goes_first if player1Starts == 1: players = { 1: { "agent": player1, "name": player1.name }, -1: { "agent": player2, "name": player2.name } } logger.info(player1.name + ' plays as X') else: players = { 1: { "agent": player2, "name": player2.name }, -1: { "agent": player1, "name": player1.name } } logger.info(player2.name + ' plays as X') logger.info('--------------') env.gameState.render(logger) env.gameState.render_print(logger) while done == 0: turn = turn + 1 #### Run the MCTS algo and return an action if turn < turns_until_tau0: action, pi, MCTS_value, NN_value = players[ state.playerTurn]['agent'].act(state, 1) else: action, pi, MCTS_value, NN_value = players[ state.playerTurn]['agent'].act(state, 0) if memory != None: ####Commit the move to memory memory.commit_stmemory(env.identities, state, pi) logger.info('action: %d', action) for r in range(env.grid_shape[0]): logger.info([ '----' if x == 0 else '{0:.2f}'.format(np.round(x, 2)) for x in pi[env.grid_shape[1] * r:(env.grid_shape[1] * r + env.grid_shape[1])] ]) logger.info('MCTS perceived value for %s: %f', state.pieces[str(state.playerTurn)], np.round(MCTS_value, 2)) logger.info('NN perceived value for %s: %f', state.pieces[str(state.playerTurn)], np.round(NN_value, 2)) logger.info('====================') ### Do the action state, value, done, _ = env.step( action ) #the value of the newState from the POV of the new playerTurn i.e. -1 if the previous player played a winning move env.gameState.render(logger) env.gameState.render_print(logger) import datetime now = datetime.datetime.now() gameName = env.name if done == 1: if memory != None: #### If the game is finished, assign the values correctly to the game moves for move in memory.stmemory: if move['playerTurn'] == state.playerTurn: move['value'] = value else: move['value'] = -value memory.commit_ltmemory() if value == 1: logger.info('%s WINS!', players[state.playerTurn]['name']) print('WINS!', players[state.playerTurn]['name']) if player2.name.find(gameName) > -1: if players[state.playerTurn]['name'] == player1.name: Provider.addCompet(player1.name, player2.name, gameName, now, 1) if players[state.playerTurn]['name'] == player2.name: Provider.addCompet(player1.name, player2.name, gameName, now, 2) else: if players[state.playerTurn]['name'] == player1.name: Provider.addCompetWithUser(player1.name, player2.name, gameName, now, 1) if players[state.playerTurn]['name'] == player2.name: Provider.addCompetWithUser(player1.name, player2.name, gameName, now, 2) scores[players[state.playerTurn]['name']] = scores[players[ state.playerTurn]['name']] + 1 if state.playerTurn == 1: sp_scores['sp'] = sp_scores['sp'] + 1 else: sp_scores['nsp'] = sp_scores['nsp'] + 1 elif value == -1: logger.info('%s WINS!', players[-state.playerTurn]['name']) print(' WINS!', players[-state.playerTurn]['name']) if player2.name.find(gameName) > -1: if players[-state.playerTurn]['name'] == player1.name: Provider.addCompet(player1.name, player2.name, gameName, now, 2) if players[-state.playerTurn]['name'] == player2.name: Provider.addCompet(player1.name, player2.name, gameName, now, 1) else: if players[-state.playerTurn]['name'] == player1.name: Provider.addCompetWithUser(player1.name, player2.name, gameName, now, 1) if players[-state.playerTurn]['name'] == player2.name: Provider.addCompetWithUser(player1.name, player2.name, gameName, now, 2) scores[players[-state.playerTurn]['name']] = scores[ players[-state.playerTurn]['name']] + 1 if state.playerTurn == 1: sp_scores['nsp'] = sp_scores['nsp'] + 1 else: sp_scores['sp'] = sp_scores['sp'] + 1 else: logger.info('DRAW...') print('DRAW...') Provider.addCompet(player1.name, player2.name, gameName, now, 0) scores['drawn'] = scores['drawn'] + 1 sp_scores['drawn'] = sp_scores['drawn'] + 1 pts = state.score points[players[state.playerTurn]['name']].append(pts[0]) points[players[-state.playerTurn]['name']].append(pts[1]) return (scores, memory, points, sp_scores)
def train(players, size, in_a_row, agents, episodes): """ Сыграть несколько партий с agents """ env = Game(players, size, in_a_row) wins = [0] * players loses = [0] * players draws = 0 for episode in range(1, episodes + 1): done = [False] * players msgs = [None] * players state = env.reset().state while not min(done): # Пройти по всем пользователям, пока для всех не закончится партия for j, ag in enumerate(agents): if not done[j]: action = ag.action(state) new_map, reward, done[j], msgs[j] = env.action(action) new_state = new_map.state ag.fit(state, new_state, action, reward) state = new_state else: for ag in agents: ag.decay(episode) if Message.DRAWMESSAGE in msgs: draws += 1 else: # Посчитать победы for j, m in enumerate(msgs): if m == Message.WINMESSAGE: wins[j] += 1 elif m == Message.LOSEMESSAGE: loses[j] += 1 if episode % 10_000 == 0: # Отобразить статистику print(f"Игра №{episode}") for j in range(players): print( f"\tИгрок {j}. Побед: {wins[j]}. Поражений: {loses[j]}" ) print(f"\tНичьих: {draws}") if episode % 20_000 == 0 and (Message.WINMESSAGE in msgs or Message.DRAWMESSAGE in msgs): # Отобразить поле в конце партии if Message.DRAWMESSAGE in msgs: print(Message.DRAWMESSAGE[1]) else: print(f"Игрок №{msgs.index(Message.WINMESSAGE)} выиграл") print(env.game_map) if episode % 200_000 == 0 and episode: # Сохранить Q-таблицы for j, ag in enumerate(agents): # ag.save(f"dumps/Player{j}-{players}-{size[0]}x{size[1]}-{in_a_row}-{episode}eps-{int(time.time())}.pickle") ag.save( f"dumps/Player{j}-{players}-{size[0]}x{size[1]}-{in_a_row}-last.pickle" ) print("Q-таблицы сохранены")
class TicTacToeTest(unittest.TestCase): def setUp(self): self.game = Game("player 1", "player 2") def test_init(self): self.assertEqual(self.game.board, [["-", "-", "-"], ["-", "-", "-"], ["-", "-", "-"]], 'Game board does not initialize to empty board') self.assertEqual(self.game.status, IN_PROGRESS, 'Game status does not initialize to in progress') self.assertEqual(self.game.move_count, 0, 'Game move does not initialize to 0') self.assertEqual(self.game.cur_player, self.game.players[0], 'Current player does not ' 'initialize to player 1') def test_update_board(self): #Test updating the board at a specified row, col pair self.game.update_board(0, 0) self.assertEqual(self.game.board, [["X", "-", "-"], ["-", "-", "-"], ["-", "-", "-"]], 'Board updates incorrectly') def test_check_status_won(self): #Check if recognizes a winning configuration consisting of 3 X's in a row self.game.board = [["X", "X", "X"], ["-", "-", "-"], ["-", "-", "-"]] self.assertEqual( self.game.check_status(0, 0), WON, 'Board doesnt recognize winning configuration across a row') # Check if recognizes a winning configuration of 3 O's in a column self.game.board = [["-", "O", "-"], ["-", "O", "-"], ["-", "O", "-"]] self.assertEqual( self.game.check_status(1, 1), WON, 'Board doesnt recognize winning configuration across a column') # Check if recognizes a winning configuration of 3 X's across one diagonal self.game.board = [["X", "-", "-"], ["-", "X", "-"], ["-", "-", "X"]] self.assertEqual( self.game.check_status(2, 2), WON, 'Board doesnt recognize winning configuration across diagonal from top-left' ' to bottom-right') # Check if recognizes a winning configuration of 3 O's across the other diagonal self.game.board = [["-", "-", "O"], ["-", "O", "-"], ["O", "-", "-"]] self.assertEqual( self.game.check_status(0, 2), WON, 'Board doesnt recognize winning configuration across diagonal from top-right' ' to bottom-left') # Check if recognizes a tied configuration, where the board is full and nobody won self.game.board = [["X", "X", "O"], ["O", "X", "X"], ["X", "O", "O"]] self.game.move_count = 9 self.assertEqual(self.game.check_status(1, 1), DRAW, 'Board does not recognize draw' 'configuration') # Check if recognizes nobody won and the game is still in progress because there is at least one empty space self.game.board = [["X", "X", "O"], ["O", "X", "X"], ["X", "O", "-"]] self.game.move_count = 8 self.assertEqual( self.game.check_status(1, 1), IN_PROGRESS, 'Board does not recognize' ' in progress configuration') def test_alternate_player(self): # Test if successfully alternates cur_player from player 1 to player 2 self.game.cur_player = self.game.players[0] player = self.game.alternate_player() self.assertEqual(player, self.game.players[1], 'Alternate from player 1' ' to player 2 not working') # Test if successfully alternates cur_player from player 2 to player 1 self.game.cur_player = self.game.players[1] player = self.game.alternate_player() self.assertEqual(player, self.game.players[0], 'Alternate from player 2' ' to player 1 not working') def test_reset(self): # Test resetting all board attributes after a game has been completed #First update the game to reflect a completed game self.game.status = 2 self.game.board = [["X", "O", "-"], ["O", "X", "O"], ["-", "X", "X"]] self.game.move_count = 7 self.game.cur_player = self.game.players[1] #Then reset the board and compare it to the desired values self.game.reset("name 1", "name 2") self.assertEqual(self.game.board, [["-", "-", "-"], ["-", "-", "-"], ["-", "-", "-"]], 'Game board does not reset to empty board') self.assertEqual(self.game.status, IN_PROGRESS, 'Game status does not reset to in progress') self.assertEqual(self.game.move_count, 0, 'Game move does not reset to 0') self.assertEqual(self.game.cur_player, self.game.players[0], 'Current player does not ' 'reset to player 1') def test_get_players(self): # Test that creates HumanPlayers and ComputerPlayers when appropriate and that they initialize correctly players = self.game.get_players("cpu", "human") self.assertEqual( players[0].is_human, False, "Creates a HumanPlayer when" "A ComputerPlayer should have been created") self.assertEqual( players[1].is_human, True, "Creates a ComputerPlayer when" "A HumanPlayer should have been created") self.assertEqual(players[0].id, 0, "Creates the wrong id for a player") self.assertEqual(players[1].letter, "O", "Creates the wrong letter for a player") self.assertEqual(players[0].name, "cpu", "Creates wrong name for a player") def test_alternate_letters(self): # Test that alternate from X to O correctly letter = ComputerPlayer.alternate_letters("X") self.assertEqual(letter, "O", "X did not alternate to O") # Test that alternate from O to X correctly letter = ComputerPlayer.alternate_letters("O") self.assertEqual(letter, "X", "O did not alternate to X") def test_evaluate_board(self): player = ComputerPlayer(0, "cpu", "X") # Check a win state win_board = [["X", "O", "-"], ["O", "X", "O"], ["-", "X", "X"]] value = player.evaluate_board(win_board, 2) self.assertEqual(value, WINNER, "Didnt recognize a won board configuration") # Check a win state with a full board win_board = [["X", "O", "X"], ["O", "X", "O"], ["O", "X", "X"]] value = player.evaluate_board(win_board, 0) self.assertEqual(value, WINNER, "Didnt recognize a won board configuration") # Check a loss state loss_board = [["-", "X", "O"], ["-", "O", "X"], ["O", "X", "-"]] value = player.evaluate_board(loss_board, 3) self.assertEqual(value, LOSER, "Didnt recognize a lost board configuration") # Check a loss state with full board loss_board = [["X", "X", "O"], ["O", "O", "X"], ["O", "X", "X"]] value = player.evaluate_board(loss_board, 0) self.assertEqual(value, LOSER, "Didnt recognize a lost board configuration") # Check a tie state tie_board = [["X", "X", "O"], ["O", "O", "X"], ["X", "O", "X"]] value = player.evaluate_board(tie_board, 0) self.assertEqual(value, TIED, "Didnt recognize a tied board configuration") # Check an in progress state in_progress_board = [["X", "X", "O"], ["O", "X", "X"], ["X", "O", "-"]] value = player.evaluate_board(in_progress_board, 1) self.assertEqual(value, IN_PROGRESS, "Didnt recognize an in progress board configuration") def test_minimax(self): # Check if chooses move to win player = ComputerPlayer(0, "cpu", "X") board = [["X", "-", "O"], ["X", "-", "O"], ["-", "-", "-"]] depth = 5 best_move = player.minimax(board=board, depth=depth, maximizing=True, letter=player.letter, alpha=-100, beta=100) self.assertEqual(best_move, [2, 0, WINNER], "Doesn't chose move that will win the game") # Checks if chooses move to prevent loss and win player = ComputerPlayer(0, "cpu", "X") board = [["-", "-", "X"], ["-", "O", "-"], ["X", "-", "O"]] depth = 5 best_move = player.minimax(board=board, depth=depth, maximizing=True, letter=player.letter, alpha=-100, beta=100) row, col = best_move[0], best_move[1] self.assertEqual([row, col], [0, 0], "Doesn't chose move that prevent a loss and win") # Checks if chooses move to prevent loss and tie player = ComputerPlayer(0, "cpu", "X") board = [["-", "-", "-"], ["-", "O", "X"], ["-", "X", "O"]] depth = 5 best_move = player.minimax(board=board, depth=depth, maximizing=True, letter=player.letter, alpha=-100, beta=100) row, col = best_move[0], best_move[1] self.assertEqual([row, col], [0, 0], "Doesn't chose move that prevent a loss") # Check if can see a few moves ahead and prevent opponent from forcing a victory player = ComputerPlayer(1, "cpu", "O") board = [["-", "-", "X"], ["-", "O", "-"], ["X", "-", "-"]] depth = 5 best_move = player.minimax(board=board, depth=depth, maximizing=True, letter=player.letter, alpha=-100, beta=100) row, col = best_move[0], best_move[1] self.assertNotEqual( [row, col], [0, 0], "Chose move that allows opponent to force a victory") self.assertNotEqual( [row, col], [2, 2], "Chose move that allows opponent to force a victory")
""" def ask_to_play_again(): """ Ask the user if they would like to play again. Keep asking until user returns yes or no """ response = input("\nWould you like to play again? Enter yes or no: ") if response == "yes": playing = True elif response == "no": playing = False else: print("Response not recognized. You must enter yes or no.") return ask_to_play_again() return playing if __name__ == "__main__": print("Welcome to Tic Tac Toe") playing = True game = Game() while playing: game.reset() game.play_game() playing = ask_to_play_again() print("Exiting Tic Tac Toe. Thanks for playing!")