def main(): log.info('Loading %s...', TicTacToeGame.__name__) g = TicTacToeGame() log.info('Loading %s...', nn.__name__) nnet = nn(g) if args.load_model: log.info(f'Loading checkpoint "{args.load_folder_file}" ...') nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) else: log.warning('Not loading a checkpoint!') log.info('Loading the Coach...') c = Coach(g, nnet, args) if args.load_model: log.info("Loading 'trainExamples' from file...") c.loadTrainExamples() log.info('Starting the learning process 🎉') c.learn()
import Arena from MCTS import MCTS from tictactoe.TicTacToeGame import TicTacToeGame, display from tictactoe.TicTacToePlayers import * from tictactoe.keras.NNet import NNetWrapper as NNet import numpy as np from utils import * """ use this script to play any two agents against each other, or play manually with any agent. """ g = TicTacToeGame(3) # all players rp = RandomPlayer(g).play # gp = TicTacToePlayer(g).play hp = HumanTicTacToePlayer(g).play # nnet players n1 = NNet(g) n1.load_checkpoint('./pretrained_models/tictactoe/keras/','best-25eps-25sim-10epch.pth.tar') args1 = dotdict({'numMCTSSims': 50, 'cpuct':1.0}) mcts1 = MCTS(g, n1, args1) n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0)) n2 = NNet(g) n2.load_checkpoint('/dev/8x50x25/','best.pth.tar')
def test_tictactoe_keras(self): self.execute_game_test(TicTacToeGame(), TicTacToeKerasNNet)
from utils import * from time import time from joblib import Parallel, delayed import multiprocessing def experiment(m): for rep in range(5): rp = RandomPlayer(g).play mcs = MCSAgent(g, nSims=100000000, time=m).play arena_rp_hp = Arena.Arena(mcs, rp, g, display=display) wins, loss, draw = arena_rp_hp.playGames(100, verbose=False) data.append([m, rep, wins, loss, draw]) return data print('Start Parallel') global_start = time() microsecs = np.array([ 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000, 1500000, 2000000, 3000000 ]) games = [3, 4, 5] for i in games: global_start = time() g = TicTacToeGame(i) data = [] data = Parallel(n_jobs=11)(delayed(experiment)(m) for m in microsecs) np.save('tictactoe_results_' + str(i), data) print('Game: ' + str(i) + ' Time: ' + str(time() - global_start))
from tictactoe.TicTacToePlayers import * from tictactoe.TicTacToeGame import TicTacToeGame,display from tictactoe.TicTacToeLogic import Board from tictactoe.keras.NNet import NNetWrapper as NNet #from othello.pytorch.NNet import NNetWrapper as NNet import numpy as np from utils import * """ use this script to play any two agents against each other, or play manually with any agent. """ #g = OthelloGame(6) g = TicTacToeGame(Board.SIZE) # all players rp = RandomPlayer(g).play #gp = GreedyOthelloPlayer(g).play #hp = HumanOthelloPlayer(g).play hp = HumanTicTacToePlayer(g).play # nnet players n1 = NNet(g) #n1.load_checkpoint('./pretrained_models/othello/pytorch/','6x100x25_best.pth.tar') #n1.load_checkpoint('./pretrained_models/tictactoe/keras','best-25eps-25sim-10epch.pth.tar') n1.load_checkpoint('./temp/4x4/','best.pth.tar') args1 = dotdict({'numMCTSSims': 50, 'cpuct':1.0}) mcts1 = MCTS(g, n1, args1)
'tempThreshold': 15, 'updateThreshold': 0.6, 'maxlenOfQueue': 200000, 'numMCTSSims': 25, 'arenaCompare': 40, 'cpuct': 1, } loadSaveArgs = {'checkpoint': './checkpoints/tictactoe/keras/' + args_to_filename(model_params), 'load_model': False, 'load_folder_file': ('models/tictactoe/keras/' + args_to_filename(model_params), 'best.pth.tar'), 'numItersForTrainExamplesHistory': 10} args = dotdict({**model_params,**loadSaveArgs}) if __name__ == "__main__": game = TicTacToeGame(3) nnet = keras_tictactoe_neuralnet(game) c = Coach(game, nnet, args) pathlib.Path(args.checkpoint).mkdir(parents=True, exist_ok=True) if args.load_model: pathlib.Path(args.load_folder_file[0]).mkdir(parents=True, exist_ok=True) nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) print("Load trainExamples from file") c.loadTrainExamples() c.learn()
import sys import numpy as np from utils import * humanPlayer = -1 computerPlayer = 1 """ use this script as the backend to the tictactoeaz.py front-end. """ def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) game = TicTacToeGame() # nnet players - The Computer Player n1 = NNet(game) n1.load_checkpoint('./pretrained_models/tictactoe/keras/', 'best-25eps-25sim-10epch.pth.tar') args1 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0}) mcts1 = MCTS(game, n1, args1) n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0)) board = game.getInitBoard() while True: eprint("Enter msg Id:") msgId = int(input()) if msgId == 0:
from othello.tensorflow.NNet import NNetWrapper as NNet2 from connect4.Connect4Game import Connect4Game, display as display3 from connect4.Connect4Players import * from connect4.tensorflow.NNet import NNetWrapper as NNet3 import numpy as np from utils import * """ use this script to play any two agents against each other, or play manually with any agent. """ choice = "othello" if choice == "tictactoe": g = TicTacToeGame(5) n1 = NNet(g) n1.load_checkpoint('./temp/', 'best75_eps95_dim5.pth.tar') display = display hp = MinMaxTicTacToePlayer(g, 4).play if choice == "gobang": g = GobangGame(6, 6) n1 = NNet1(g) n1.load_checkpoint('./temp/', 'temp:iter75:eps5:dim6.pth.tar') display = display1 hp = MinMaxGobangPlayer(g, 6).play if choice == "othello": g = OthelloGame(6) n1 = NNet2(g) n1.load_checkpoint('./temp/', 'best75:eps140:dim6.pth.tar') display = display2
def experiment(game): np.random.seed(556) g = TicTacToeGame(game) if game == 3: total_episodes = n_episodes[0] ep_step = 10000 ep_range = np.arange(0, total_episodes + ep_step, ep_step) + 1 ep_range[0] = 0 ep_range = ep_range.astype(int) elif game == 4: total_episodes = n_episodes[1] ep_step = 20000 ep_range = np.arange(0, total_episodes + ep_step, ep_step) + 1 ep_range[0] = 0 ep_range = ep_range.astype(int) else: total_episodes = n_episodes[2] ep_step = 28000 ep_range = np.arange(0, total_episodes + ep_step, ep_step) + 1 ep_range[0] = 0 ep_range = ep_range.astype(int) for lr in lrs: for i in epsilon_config: print('Config: Game', game, 'lr', lr, 'epsilon', i) test_wr_list = [] test_wr = [] if i == 'f': q_agent = QAgent(g, episodes=total_episodes, lr=lr, epsilon=0.2, dc=1, e_min=0.001, ep_arena=ep_step) rp = RandomPlayer(g).play q_agent_play = q_agent.play else: q_agent = QAgent(g, episodes=total_episodes, lr=lr, epsilon=1, dc=0.99, e_min=0.001, ep_arena=ep_step) rp = RandomPlayer(g).play q_agent_play = q_agent.play start = time() for idx, episode in enumerate(ep_range): if episode == ep_range[-1]: break if episode == 0: print('Training for Episodes ', 0, ' to ', ep_range[idx + 1] - 1, '...', sep='') elif episode == ep_range[-2]: print('Training for Episodes ', episode - 1, ' to ', total_episodes, '...', sep='') else: print('Training for Episodes ', episode - 1, ' to ', ep_range[idx + 1] - 1, '...', sep='') q_agent.train(cur_episode=episode) print('Training Finished.') print('Playing in Arena...') wins = 0 temp = [] for repet in range(reps): arena_rp_op = Arena.Arena(q_agent_play, rp, g, display=display) w, _, _ = arena_rp_op.playGames(n_games, verbose=False) temp.append(w / n_games) wins += w test_wr_list.append(temp) test_wr.append(wins / (reps * n_games)) print('\n') end = time() training_time = np.array([end - start]) np.save( 'train_wr_tictactoe_' + str(game) + '_' + str(lr) + '_' + str(i), q_agent.total_wins) np.save( 'train_ep_tictactoe_' + str(game) + '_' + str(lr) + '_' + str(i), q_agent.total_eps) np.save( 'test_wr_tictactoe_' + str(game) + '_' + str(lr) + '_' + str(i), test_wr) np.save( 'test_wr_list_tictactoe_' + str(game) + '_' + str(lr) + '_' + str(i), test_wr_list) np.save( 'training_time_' + str(game) + '_' + str(lr) + '_' + str(i), training_time) print('\n')