class MCTSPlayer: def __init__(self, nPlay, maxPlies, bNegamax, cUct=1 / np.sqrt(2), bDump=False): self._nPlay = nPlay self._maxPlies = maxPlies if bNegamax: self._uct = UCTNegamax(cUct) else: self._uct = UCT(cUct) self._cUct = cUct self._bNegamax = bNegamax self._bDump = bDump self._uctMove = UCT(0) self._rp = RandomPlayer() self._nprand = np.random.RandomState() self._root = None def __str__(self): return ("%s nPlay = %d maxPlies = %d bNegamax = %s cUct = %.4f" % (self.__class__.__name__, self._nPlay, self._maxPlies, self._bNegamax, self._cUct)) def _simulate(self, node): # "A simulation is run from the new node(s) according to the # default policy to produce an outcome." return play.playRest(self._rp, self._rp, node.ttt.clone(), False, 99999)[0] def setSeed(self, seed): self._nprand.seed(seed) self._rp.setSeed(seed + 1) def move(self, ttt): if self._root is not None: self._root = self._root.findBoard(ttt) if self._root is None: self._root = Node(self._nprand, ttt, 1, maxPlies=self._maxPlies) marker = ttt.whoseTurn() for _ in range(self._nPlay): nodeLeaf = self._root.select(self._uct) if nodeLeaf is not None: nodeSim = nodeLeaf.expand() if nodeSim is not None: # print ("START:", nodeSim.maxPlies, nodeSim.move) w = self._simulate(nodeSim) if w == ttt.whoseTurn(): score = 1 elif w == game.Draw: score = .5 else: score = 0 # print ("SCORE:", marker, w, score) nodeSim.backpropagate(score) if self._bDump: self._root.dump() self._root = self._root.bestChild(self._uctMove) return self._root.move def tests(self): self._root.check_parentage()
class OmniscientAdversary: def __init__(self, nPlay): self._rp = RandomPlayer() self._rand = random.Random() self._epsSame = 1e-6 self._nPlay = nPlay def __str__(self): return "%s nPlay = %d" % (self.__class__.__name__, self._nPlay) def reconfigure(self, nn): self._nn = nn def setSeed(self, seed): if seed is None: self._rp.setSeed(None) self._rand.seed(None) else: self._rp.setSeed(seed) self._rand.seed(seed+1) def move(self, ttt): bestQ = -1e99 qs = [] vm = ttt.validMoves() for m in vm: q = self._moveQuality(ttt, m) if q > bestQ: bestQ = q qs.append(q) bestMoves = [] for iMove, q in enumerate(qs): if abs(q-bestQ) < self._epsSame: bestMoves.append(vm[iMove]) return random.choice(bestMoves) def xx_move(self, ttt): bestQ = -1e99 qs = [] vm = ttt.validMoves() for m in vm: q = self._moveQuality(ttt, m) if q > bestQ: bestQ = q qs.append(q) qs = np.array(qs) pMove = qs - qs.min() + 1e-6 pMove /= pMove.sum() return np.random.choice(vm, p=pMove) def _moveQuality(self, ttt, m): scores = [] if ttt.whoseTurn() == game.X: pX = self._rp pO = self._nn else: pX = self._nn pO = self._rp nPlay = self._nPlay for _ in range(nPlay): scores.append(play.simGame(pX, pO, ttt, m)) scores = np.array(scores) return scores.mean()