示例#1
0
class MCTSPlayer:
    def __init__(self,
                 nPlay,
                 maxPlies,
                 bNegamax,
                 cUct=1 / np.sqrt(2),
                 bDump=False):
        self._nPlay = nPlay
        self._maxPlies = maxPlies
        if bNegamax:
            self._uct = UCTNegamax(cUct)
        else:
            self._uct = UCT(cUct)
        self._cUct = cUct
        self._bNegamax = bNegamax
        self._bDump = bDump
        self._uctMove = UCT(0)
        self._rp = RandomPlayer()
        self._nprand = np.random.RandomState()

        self._root = None

    def __str__(self):
        return ("%s nPlay = %d maxPlies = %d bNegamax = %s cUct = %.4f" %
                (self.__class__.__name__, self._nPlay, self._maxPlies,
                 self._bNegamax, self._cUct))

    def _simulate(self, node):
        # "A simulation is run from the new node(s) according to the
        #  default policy to produce an outcome."
        return play.playRest(self._rp, self._rp, node.ttt.clone(), False,
                             99999)[0]

    def setSeed(self, seed):
        self._nprand.seed(seed)
        self._rp.setSeed(seed + 1)

    def move(self, ttt):
        if self._root is not None:
            self._root = self._root.findBoard(ttt)

        if self._root is None:
            self._root = Node(self._nprand, ttt, 1, maxPlies=self._maxPlies)

        marker = ttt.whoseTurn()
        for _ in range(self._nPlay):
            nodeLeaf = self._root.select(self._uct)
            if nodeLeaf is not None:
                nodeSim = nodeLeaf.expand()
                if nodeSim is not None:
                    # print ("START:", nodeSim.maxPlies, nodeSim.move)
                    w = self._simulate(nodeSim)
                    if w == ttt.whoseTurn():
                        score = 1
                    elif w == game.Draw:
                        score = .5
                    else:
                        score = 0
                    # print ("SCORE:", marker, w, score)
                    nodeSim.backpropagate(score)

        if self._bDump:
            self._root.dump()
        self._root = self._root.bestChild(self._uctMove)
        return self._root.move

    def tests(self):
        self._root.check_parentage()
示例#2
0
class OmniscientAdversary:
    def __init__(self, nPlay):
        self._rp = RandomPlayer()
        self._rand = random.Random()
        self._epsSame = 1e-6
        self._nPlay = nPlay

    def __str__(self):
        return "%s nPlay = %d" % (self.__class__.__name__, self._nPlay)

    def reconfigure(self, nn):
        self._nn = nn

    def setSeed(self, seed):
        if seed is None:
            self._rp.setSeed(None)
            self._rand.seed(None)
        else:
            self._rp.setSeed(seed)
            self._rand.seed(seed+1)

    def move(self, ttt):
        bestQ = -1e99
        qs = []
        vm = ttt.validMoves()
        for m in vm:
            q = self._moveQuality(ttt, m)
            if q > bestQ:
                bestQ = q
            qs.append(q)

        bestMoves = []
        for iMove, q in enumerate(qs):
            if abs(q-bestQ) < self._epsSame:
                bestMoves.append(vm[iMove])

        return random.choice(bestMoves)

    def xx_move(self, ttt):
        bestQ = -1e99
        qs = []
        vm = ttt.validMoves()
        for m in vm:
            q = self._moveQuality(ttt, m)
            if q > bestQ:
                bestQ = q
            qs.append(q)

        qs = np.array(qs)
        pMove = qs - qs.min() + 1e-6
        pMove /= pMove.sum()
        return np.random.choice(vm, p=pMove)

    def _moveQuality(self, ttt, m):
        scores = []
        if ttt.whoseTurn() == game.X:
            pX = self._rp
            pO = self._nn
        else:
            pX = self._nn
            pO = self._rp

        nPlay = self._nPlay
        for _ in range(nPlay):
            scores.append(play.simGame(pX, pO, ttt, m))

        scores = np.array(scores)
        return scores.mean()