def __init__(self, NetType='ResNet'): self.game = game(BoardSize) self.board = self.game.getInitBoard() self.n = self.game.getBoardSize()[0] self.players = [self.AlphaPlay, None, self.HumanPlay] self.curPlayer = 1 self.gameStatus = 0 if NetType == 'ResNet': self.AlphaNet = nn(self.game, t='RES') self.AlphaNet.load_checkpoint( '/home/zc1213/course/alphabackend/alphabrain/HistoryLog/Go/R_Ver2_checkpoint/{}/' .format(BoardSize), 'best.pth.tar') self.AlphaArgs = dotdict({'numMCTSSims': 2000, 'cpuct': 21.3}) self.AlphaMCTS = MCTS(self.game, self.AlphaNet, self.AlphaArgs) self.Alpha = lambda x: np.argmax( self.AlphaMCTS.getActionProb(x, temp=0)) else: self.AlphaNet = nn(self.game, t='CNN') self.AlphaNet.load_checkpoint( '/home/zc1213/course/alphabackend/alphabrain/HistoryLog/Go/C_checkpoint/{}/' .format(BoardSize), 'best.pth.tar') self.AlphaArgs = dotdict({'numMCTSSims': 2000, 'cpuct': 17.3}) self.AlphaMCTS = MCTS(self.game, self.AlphaNet, self.AlphaArgs) self.Alpha = lambda x: np.argmax( self.AlphaMCTS.getActionProb(x, temp=0)) self.alphaMoveCache = {}
def __init__(self, game, nnet, args): self.game = game self.nnet = nnet self.pnet = self.nnet.__class__(self.game) # the competitor network self.args = args self.mcts = MCTS(self.game, self.nnet, self.args) self.trainExamplesHistory = [ ] # history of examples from args.numItersForTrainExamplesHistory latest iterations self.skipFirstSelfPlay = False # can be overriden in loadTrainExamples() self.temp = 0
def __init__(self, game, nnet, args, log=False, logPath=''): self.game = game self.nnet = nnet self.pnet = self.nnet.__class__( self.game, t=self.nnet.netType) # the competitor network self.args = args self.mcts = MCTS(self.game, self.nnet, self.args) self.trainExamplesHistory = [ ] # history of examples from args.numItersForTrainExamplesHistory latest iterations self.skipFirstSelfPlay = False # can be overriden in loadTrainExamples() self.display = args['display'] self.keepLog = log self.logPath = logPath
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 0: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) for _ in tqdm(range(self.args.numEps), desc='self play'): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) self.nnet.train(trainExamples) #self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
def tour(): pathAtt = './HistoryLog/Go/' Rcand = { 'R1_10': [pathAtt + 'R_Ver1_checkpoint/7/', 'checkpoint_11.pth.tar'], 'R1_40': [pathAtt + 'R_Ver1_checkpoint/7/', 'checkpoint_47.pth.tar'], 'R1_B': [pathAtt + 'R_Ver1_checkpoint/7/', 'best.pth.tar'], 'R2_B': [pathAtt + 'R_Ver2_checkpoint/7/', 'best.pth.tar'], 'R3_B': [pathAtt + 'R_Ver3_checkpoint/7/', 'best.pth.tar'] } Ccand = { 'C_10': [pathAtt + 'C_Ver1_checkpoint/7/', 'checkpoint_6.pth.tar'], 'C_40': [pathAtt + 'C_Ver1_checkpoint/7/', 'checkpoint_40.pth.tar'], 'C_B': [pathAtt + 'C_Ver1_checkpoint/7/', 'best.pth.tar'] } compares = [('R1_10', 'C_10'), ('R1_40', 'C_40'), ('R1_B', 'C_B'), ('R2_B', 'C_B'), ('R3_B', 'C_B'), ('R1_B', 'R2_B'), ('R1_B', 'R3_B'), ('R2_B', 'R3_B')] res = [] for c in [('R1_10', 'C_10')]: print(c) p1type = 'RES' if c[0][0] == 'R' else 'CNN' p2type = 'RES' if c[1][0] == 'R' else 'CNN' p1checkpoint = Rcand[c[0]] if c[0][0] == 'R' else Ccand[c[0]] p2checkpoint = Rcand[c[1]] if c[1][0] == 'R' else Ccand[c[1]] print(p1type, p2type) print(p1checkpoint, p2checkpoint) Net1 = nn(g, t=p1type) Net1.load_checkpoint(p1checkpoint[0], p1checkpoint[1]) Args1 = dotdict({'numMCTSSims': 3000, 'cpuct': 17.5}) MCTS1 = MCTS(g, Net1, Args1) Player1 = lambda x: np.argmax(MCTS1.getActionProb(x, temp=0)) Net2 = nn(g, t=p2type) Net2.load_checkpoint(p2checkpoint[0], p2checkpoint[1]) Args2 = dotdict({ 'numMCTSSims': 3000 if p2type == 'RNN' else 250, 'cpuct': 17.5 if p2type == 'RNN' else 3.0 }) MCTS2 = MCTS(g, Net2, Args2) Player2 = lambda x: np.argmax(MCTS2.getActionProb(x, temp=0)) arena = Arena.Arena(Player1, Player2, g, display=display) _res = arena.playGames(10, verbose=True) res.append(_res) result = {'1win': [], '2win': [], 'draw': []} for r in res: result['1win'].append(r[0]) result['2win'].append(r[1]) result['draw'].append(r[2]) pd.DataFrame(data=result).to_csv('reuslt.csv')
class Coach(): """ This class executes the self-play + learning. It uses the functions defined in Game and NeuralNet. args are specified in main.py. """ def __init__(self, game, nnet, args): self.game = game self.nnet = nnet self.pnet = self.nnet.__class__(self.game) # the competitor network self.args = args self.mcts = MCTS(self.game, self.nnet, self.args) self.trainExamplesHistory = [ ] # history of examples from args.numItersForTrainExamplesHistory latest iterations self.skipFirstSelfPlay = False # can be overriden in loadTrainExamples() self.temp = 0 def executeEpisode(self): """ This function executes one episode of self-play, starting with player 1. As the game is played, each turn is added as a training example to trainExamples. The game is played till the game ends. After the game ends, the outcome of the game is used to assign values to each example in trainExamples. It uses a temp=1 if episodeStep < tempThreshold, and thereafter uses temp=0. Returns: trainExamples: a list of examples of the form (canonicalBoard,pi,v) pi is the MCTS informed policy vector, v is +1 if the player eventually won the game, else -1. """ trainExamples = [] board = self.game.getInitBoard() self.curPlayer = 1 episodeStep = 0 while True: episodeStep += 1 canonicalBoard = self.game.getCanonicalForm(board, self.curPlayer) temp = int(episodeStep < self.args.tempThreshold) pi = self.mcts.getActionProb(canonicalBoard, temp=temp) sym = self.game.getSymmetries(canonicalBoard, pi) for b, p in sym: trainExamples.append([b, self.curPlayer, p, None]) action = np.random.choice(len(pi), p=pi) board, self.curPlayer = self.game.getNextState( board, self.curPlayer, action) r = self.game.getGameEnded(board, self.curPlayer) if r != 0: return [(x[0], x[2], r * ((-1)**(x[1] != self.curPlayer))) for x in trainExamples] def bot_play(self, board): temp = int(False) pi = self.mcts.getActionProb(board, temp=temp) action = np.random.choice(len(pi), p=pi) board, self.curPlayer = self.game.getNextState(board, self.curPlayer, action) def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ for i in range(1, self.args.numIters + 1): # bookkeeping print('------ITER ' + str(i) + '------') # examples of the iteration if not self.skipFirstSelfPlay or i > 0: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) for _ in tqdm(range(self.args.numEps), desc='self play'): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) self.nnet.train(trainExamples) #self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar') def getCheckpointFile(self, iteration): #return str(iteration) + 'best.pth.tar' return 'best.pth.tar' def saveTrainExamples(self, iteration): folder = self.args.checkpoint if not os.path.exists(folder): os.makedirs(folder) #filename = os.path.join(folder, self.getCheckpointFile(iteration)+".examples") filename = os.path.join(folder, "best.pth.tar.examples") with open(filename, "wb+") as f: Pickler(f).dump(self.trainExamplesHistory) f.closed def loadTrainExamples(self): modelFile = os.path.join(self.args.load_folder_file[0], self.args.load_folder_file[1]) examplesFile = modelFile + ".examples" if not os.path.isfile(examplesFile): print(examplesFile) r = input("File with trainExamples not found. Continue? [y|n]") if r != "y": sys.exit() else: print("File with trainExamples found. Read it.") with open(examplesFile, "rb") as f: self.trainExamplesHistory = Unpickler(f).load() f.closed # examples based on the model were already collected (loaded) self.skipFirstSelfPlay = True
def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ iterHistory = {'ITER': [], 'ITER_DETAIL': [], 'PITT_RESTULT': []} for i in range(1, self.args.numIters + 1): iterHistory['ITER'].append(i) # bookkeeping print( '###########################ITER:{}###########################' .format(str(i))) # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() if self.display == 1: bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): # print("{}th Episode:".format(eps+1)) self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() if self.display == 1: bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() if self.display == 1: bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: # print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) trainLog = self.nnet.train(trainExamples) if self.keepLog: trainLog.to_csv(self.logPath + 'ITER_{}_TRAIN_LOG.csv'.format(i)) iterHistory['ITER_DETAIL'].append( self.logPath + 'ITER_{}_TRAIN_LOG.csv'.format(i)) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins > 0 and float(nwins) / ( pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') iterHistory['PITT_RESTULT'].append('R') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') else: print('ACCEPTING NEW MODEL') iterHistory['PITT_RESTULT'].append('A') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar') pd.DataFrame(data=iterHistory).to_csv(self.logPath + 'ITER_LOG.csv')
class Coach(): """ This class executes the self-play + learning. It uses the functions defined in Game and NeuralNet. args are specified in main.py. """ def __init__(self, game, nnet, args, log=False, logPath=''): self.game = game self.nnet = nnet self.pnet = self.nnet.__class__( self.game, t=self.nnet.netType) # the competitor network self.args = args self.mcts = MCTS(self.game, self.nnet, self.args) self.trainExamplesHistory = [ ] # history of examples from args.numItersForTrainExamplesHistory latest iterations self.skipFirstSelfPlay = False # can be overriden in loadTrainExamples() self.display = args['display'] self.keepLog = log self.logPath = logPath def executeEpisode(self): """ This function executes one episode of self-play, starting with player 1. As the game is played, each turn is added as a training example to trainExamples. The game is played till the game ends. After the game ends, the outcome of the game is used to assign values to each example in trainExamples. It uses a temp=1 if episodeStep < tempThreshold, and thereafter uses temp=0. Returns: trainExamples: a list of examples of the form (canonicalBoard,pi,v) pi is the MCTS informed policy vector, v is +1 if the player eventually won the game, else -1. """ trainExamples = [] board = self.game.getInitBoard() self.curPlayer = 1 episodeStep = 0 while True: episodeStep += 1 if self.display == 2: print( "================Episode step:{}=====CURPLAYER:{}==========" .format(episodeStep, "White" if self.curPlayer == -1 else "Black")) canonicalBoard = self.game.getCanonicalForm(board, self.curPlayer) temp = int(episodeStep < self.args.tempThreshold) pi = self.mcts.getActionProb(canonicalBoard, temp=temp) sym = self.game.getSymmetries(canonicalBoard, pi) for b, p in sym: trainExamples.append([b, self.curPlayer, p, None]) action = np.random.choice(len(pi), p=pi) board, self.curPlayer = self.game.getNextState( board, self.curPlayer, action) if self.display == 2: print("BOARD updated:") display(board) r = self.game.getGameEnded(board.copy(), self.curPlayer, returnScore=False) # print(score) if r != 0: if self.display == 2: print( "Current episode ends, {} wins with score :B:{};W:{}.". format('Black' if r == 1 else 'White', score[0], score[1])) return [(x[0], x[2], r * ((-1)**(x[1] != self.curPlayer))) for x in trainExamples] def learn(self): """ Performs numIters iterations with numEps episodes of self-play in each iteration. After every iteration, it retrains neural network with examples in trainExamples (which has a maximium length of maxlenofQueue). It then pits the new neural network against the old one and accepts it only if it wins >= updateThreshold fraction of games. """ iterHistory = {'ITER': [], 'ITER_DETAIL': [], 'PITT_RESTULT': []} for i in range(1, self.args.numIters + 1): iterHistory['ITER'].append(i) # bookkeeping print( '###########################ITER:{}###########################' .format(str(i))) # examples of the iteration if not self.skipFirstSelfPlay or i > 1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) eps_time = AverageMeter() if self.display == 1: bar = Bar('Self Play', max=self.args.numEps) end = time.time() for eps in range(self.args.numEps): # print("{}th Episode:".format(eps+1)) self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() if self.display == 1: bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() if self.display == 1: bar.finish() # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) if len(self.trainExamplesHistory ) > self.args.numItersForTrainExamplesHistory: # print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i - 1) # shuffle examlpes before training trainExamples = [] for e in self.trainExamplesHistory: trainExamples.extend(e) shuffle(trainExamples) # training new network, keeping a copy of the old one self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) trainLog = self.nnet.train(trainExamples) if self.keepLog: trainLog.to_csv(self.logPath + 'ITER_{}_TRAIN_LOG.csv'.format(i)) iterHistory['ITER_DETAIL'].append( self.logPath + 'ITER_{}_TRAIN_LOG.csv'.format(i)) nmcts = MCTS(self.game, self.nnet, self.args) print('PITTING AGAINST PREVIOUS VERSION') arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)), lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game) pwins, nwins, draws = arena.playGames(self.args.arenaCompare) print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws)) if pwins + nwins > 0 and float(nwins) / ( pwins + nwins) < self.args.updateThreshold: print('REJECTING NEW MODEL') iterHistory['PITT_RESTULT'].append('R') self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') else: print('ACCEPTING NEW MODEL') iterHistory['PITT_RESTULT'].append('A') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar') pd.DataFrame(data=iterHistory).to_csv(self.logPath + 'ITER_LOG.csv') def getCheckpointFile(self, iteration): return 'checkpoint_' + str(iteration) + '.pth.tar' def saveTrainExamples(self, iteration): folder = self.args.checkpoint if not os.path.exists(folder): os.makedirs(folder) filename = os.path.join( folder, self.getCheckpointFile(iteration) + ".examples") with open(filename, "wb+") as f: Pickler(f).dump(self.trainExamplesHistory) f.closed def loadTrainExamples(self): modelFile = os.path.join(self.args.load_folder_file[0], self.args.load_folder_file[1]) examplesFile = modelFile + ".examples" if not os.path.isfile(examplesFile): print(examplesFile) r = input("File with trainExamples not found. Continue? [y|n]") if r != "y": sys.exit() else: print("File with trainExamples found. Read it.") with open(examplesFile, "rb") as f: self.trainExamplesHistory = Unpickler(f).load() f.closed # examples based on the model were already collected (loaded) self.skipFirstSelfPlay = True
from Go.keras.NNet import NNetWrapper as NNet import numpy as np from utils import * """ use this script to play any two agents against each other, or play manually with any agent. """ g = GoGame(9) hp = HumanOthelloPlayer(g) t = hp.choose_turn() hp.t = t#1 is hp first turn g.t = t # all players rp = RandomPlayer(g).play gp = GreedyOthelloPlayer(g).play n2 = NNet(g) n2.load_checkpoint('./temp/','best.pth.tar') args2 = dotdict({'numMCTSSims': 25, 'cpuct':1.0}) mcts2 = MCTS(g, n2, args2) n2p = lambda x: np.argmax(mcts2.getActionProb(x, temp=0)) arena = ArenaHP.Arena(n2p, hp, g, display=display) arena.t = t print(arena.playGame(verbose=True))
import numpy as np from Go.GoGame import GoGame as Game from Go.keras.NNet import NNetWrapper as nn from utils import * from gtp import BLACK, WHITE, PASS, RESIGN from gtp import gtp_boolean, gtp_list, gtp_color, gtp_vertex, gtp_move, parse_vertex import gtp as gtp_lib import re, sys n = 9 game = Game(n) nnet = nn(game) nnet.load_checkpoint('./temp/', 'best.pth.tar') args = dotdict({'numMCTSSims': 20, 'cpuct': 1.0}) mcts = MCTS(game, nnet, args) board = game.getInitBoard() def translate_gtp_colors(gtp_color): if gtp_color == BLACK: return board.BLACK elif gtp_color == WHITE: return board.WHITE else: return board.EMPTY class GtpInterface(object): def __init__(self):
g = game(BoardSize) # all players rp = RandomPlayer(g).play gp = GreedyGoPlayer(g).play hp = HumanGoPlayer(g).play # nnet players NetType = 'CNN' ResNet = nn(g, t='RES') ResNet.load_checkpoint( './HistoryLog/Go/R_Ver2_checkpoint/{}/'.format(BoardSize), 'RVer2.best.pth.tar') ResArgs = dotdict({'numMCTSSims': 3000, 'cpuct': 17.0}) ResMCTS = MCTS(g, ResNet, ResArgs) ResPlayer = lambda x: np.argmax(ResMCTS.getActionProb(x, temp=0)) CNN = nn(g, t='CNN') CNN.load_checkpoint('./HistoryLog/Go/C_checkpoint/{}/'.format(BoardSize), 'checkpoint_4.pth.tar') CNNArgs = dotdict({'numMCTSSims': 250, 'cpuct': 3.0}) CNNMCTS = MCTS(g, CNN, CNNArgs) CNNPlayer = lambda x: np.argmax(CNNMCTS.getActionProb(x, temp=0)) arena = Arena.Arena(ResPlayer, CNNPlayer, g, display=display) def tour(): pathAtt = './HistoryLog/Go/' Rcand = {
from GoMCTS import MCTS as MCTS import numpy as np from Go.GoGame import GoGame as Game from Go.keras.NNet import NNetWrapper as nn from utils import * from gtp import BLACK, WHITE, PASS from gtp import gtp_boolean, gtp_list, gtp_color, gtp_vertex, gtp_move, parse_vertex import re n = 9 game = Game(n) nnet = nn(game) nnet.load_checkpoint('./temp/', 'best.pth.tar') args = dotdict({'numMCTSSims': 20, 'cpuct': 1.0}) mcts = MCTS(game, nnet, args) board = game.getInitBoard() #time.sleep(random.uniform(0.1, 1.0)) # simulates process time def play(vertex, color, board): if int(vertex[0]) == 0 and int(vertex[1]) == 0: board = board else: vertex = (int(vertex[0]) - 1, 8 - (int(vertex[1]) - 1)) action = vertex[1] * 9 + vertex[0] board, curPlayer = game.getNextState(board, color, action) return board
class InterGame(object): def __init__(self, NetType='ResNet'): self.game = game(BoardSize) self.board = self.game.getInitBoard() self.n = self.game.getBoardSize()[0] self.players = [self.AlphaPlay, None, self.HumanPlay] self.curPlayer = 1 self.gameStatus = 0 if NetType == 'ResNet': self.AlphaNet = nn(self.game, t='RES') self.AlphaNet.load_checkpoint( '/home/zc1213/course/alphabackend/alphabrain/HistoryLog/Go/R_Ver2_checkpoint/{}/' .format(BoardSize), 'best.pth.tar') self.AlphaArgs = dotdict({'numMCTSSims': 2000, 'cpuct': 21.3}) self.AlphaMCTS = MCTS(self.game, self.AlphaNet, self.AlphaArgs) self.Alpha = lambda x: np.argmax( self.AlphaMCTS.getActionProb(x, temp=0)) else: self.AlphaNet = nn(self.game, t='CNN') self.AlphaNet.load_checkpoint( '/home/zc1213/course/alphabackend/alphabrain/HistoryLog/Go/C_checkpoint/{}/' .format(BoardSize), 'best.pth.tar') self.AlphaArgs = dotdict({'numMCTSSims': 2000, 'cpuct': 17.3}) self.AlphaMCTS = MCTS(self.game, self.AlphaNet, self.AlphaArgs) self.Alpha = lambda x: np.argmax( self.AlphaMCTS.getActionProb(x, temp=0)) self.alphaMoveCache = {} def initialize(self): self.board = self.game.getInitBoard() self.alphaMoveCache = {} return True def getScore(self): return self.game.getScore(self.board) def judgeGame(self): self.gameStatus = self.game.getGameEnded(self.board, self.curPlayer) if self.gameStatus == -1: print("player 1 lost.") return -1 elif self.gameStatus == 1: print("player 1 won.") return 1 else: print("game continues.") return 0 def getAlphaPlayFromCache(self, humanMove): if humanMove in list(self.alphaMoveCache.keys()): print("have cached, get from cache") return self.alphaMoveCache[humanMove] else: print("new request,come back later") self.alphaMoveCache = {} self.alphaMoveCache.update({humanMove: self.AlphaPlay()}) return self.alphaMoveCache[humanMove] def AlphaPlay(self, *move): assert (self.judgeGame() == 0) action = self.Alpha( self.game.getCanonicalForm(self.board, self.curPlayer)) valids = self.game.getValidMoves( self.game.getCanonicalForm(self.board, self.curPlayer), 1) if valids[action] == 0: print(action) assert valids[action] > 0 self.board, self.curPlayer = self.game.getNextState( self.board, self.curPlayer, action) alphaMove = (int(action / self.n), int(action % self.n)) return alphaMove def HumanPlay(self, move): assert (self.judgeGame() == 0) x, y = [int(x) for x in move] valids = self.game.getValidMoves( self.game.getCanonicalForm(self.board, self.curPlayer), 1) action = self.game.n * x + y if x != -1 else self.game.n**2 if valids[action] == 0: print("Invalid Move!") return self.board, self.curPlayer = self.game.getNextState( self.board, self.curPlayer, action) return def showBoard(self): display(self.board)