import time from tictactoe import TicTacToe from player import Player from ai_player import AIPlayer p1 = Player() p2 = AIPlayer() # Train with 10000 games against itself. print('Learning...') t = time.time() for _ in range(10000): game = TicTacToe(p2, p2) game.start() print('Learned {} unique states from {:.0f} games in {:.2f} seconds'.format( len(p2.states['x']) + len(p2.states['o']), p2.games_played() / 2, time.time() - t)) # Only perform best moves. p2.epsilon = 0 # Play against human player. while True: game = TicTacToe(p1, p2) game.start()
p1 = AIPlayer() p2 = SoftmaxPlayer() # Train e-greedy player against itself. for _ in range(2000): game = TicTacToe(p1, p1) game.start() # Train softmax player against itself. for _ in range(2000): game = TicTacToe(p2, p2) game.start() # Set parameters so that optimal moves are preferred. p1.epsilon = 0 p2.theta = 1 # Remove stats from training matches. p1.draws = 0 p1.wins = 0 p1.losses = 0 p2.draws = 0 p2.wins = 0 p2.losses = 0 # E-greedy vs softmax. for _ in range(100): game = TicTacToe(p1, p2) game.start()