Пример #1
0
    def deadPattern1(self, tempBoard, shortcutState):
        modified = False

        for s in range(4, len(tempBoard)):
            if tempBoard[s] == 0:
                continue
            c1 = tempBoard[s]
            c2 = nextPlayer(c1)

            su = self.state.dirAdj(s, "u")
            if su == -1 or tempBoard[su] != c1:
                continue

            m = self.state.dirAdj(s, "ur")
            if m == -1 or tempBoard[m] != 0:
                continue

            sr = self.state.dirAdj(m, "dr")
            if sr == -1 or tempBoard[sr] != c2:
                continue

            sru = self.state.dirAdj(sr, "u")
            if sru == -1 or tempBoard[sru] != c2:
                continue

            tempBoard[m] = nextPlayer(self.player)
            self.deadCells[m] = nextPlayer(self.player)
            shortcutState.setHexIndex(m, nextPlayer(self.player))
            modified = True

        return modified
Пример #2
0
    def rollout(self, state, player):
        #Get this state's info

        info = []
        if state.number() in self.stateInfo.keys():
            info = self.stateInfo[state.number()]
        else:
            info = [0, 0]

        info[0] += 1

        #If state is terminal
        if state.outcome() != 0:
            v = 1 if state.outcome() == self.playerNumber else -1
            info[1] = v
            self.stateInfo[state.number()] = info
            return v

        #State is not terminal, get children
        moves = state.moves()
        states = []
        childInfo = []
        for m in moves:
            s = state.clone()
            s.setHexIndex(m, player)
            states.append(s)
            if s.number() in self.stateInfo.keys():
                childInfo.append(self.stateInfo[s.number()])
            else:
                childInfo.append([0, 0])

        #Evaluate heuristic
        heuristic = []

        sgn = 1 if player == self.playerNumber else -1
        for i in range(len(states)):
            bonus = 3.0 if self.maximizeNonVisited and childInfo[i][
                0] == 0 else 0
            h = sgn * childInfo[i][1] + self.expConst * math.sqrt(
                math.log(info[0]) / (childInfo[i][0] + 1)) + bonus
            heuristic.append(h)

        #Pick best option according to current player
        bestIndex = argmax(heuristic)
        bestState = states[bestIndex]

        outcome = self.rollout(bestState, nextPlayer(player))

        if self.bootstrap:
            est = self.stateInfo[bestState.number()][1]
            info[1] = info[1] + (1.0 / info[0]) * (outcome * 0.5 + est * 0.5 -
                                                   info[1])
        else:
            info[1] = info[1] + (1.0 / info[0]) * (outcome - info[1])

        self.stateInfo[state.number()] = info
        for i in range(len(states)):
            self.stateInfo[states[i].number()] = childInfo[i]

        return outcome
Пример #3
0
    def expandChildren(self):
        if not self.children is None:
            return

        self.children = []
        moves = self.state.moves()
        np = nextPlayer(self.player)
        for m in moves:
            s = self.state.clone()
            s.setHexIndex(m, self.player)
            #self.children.append(s)
            self.children.append(Node(s, np))
Пример #4
0
    def rollout(self, node, expand):
        #have no children
        if node.children is None:
            if expand:
                node.expandChildren()
                expand = False

        node.visits += 1

        #if node is terminal, return
        if node.state.outcome() != 0:
            v = 1 if node.state.outcome() == self.playerNumber else -1
            node.value = v
            return [v, self.gamma]

        #Node isn't terminal, get its children (should be generated already)
        moves = node.state.moves()
        children = node.children

        #No children, simulate rollout
        if children is None:
            p = node.player
            s = node.state.clone()
            gamma = 1
            while s.outcome() == 0:
                s.randomMove(p)
                p = nextPlayer(p)
                gamma *= self.gamma
            v = 1.0 if s.outcome() == self.playerNumber else -1
            node.value = node.value + (1.0 / node.visits) * (v * gamma -
                                                             node.value)
            return [v, gamma * self.gamma]

        #compute heuristic
        heuristic = []
        sgn = 1 if node.player == self.playerNumber else -1
        for cn in children:
            bonus = 3 if self.maximizeNonVisited and cn.visits == 0 else 0
            h = sgn * cn.value + self.expConst * math.sqrt(
                math.log(node.visits) / (cn.visits + 1)) + bonus
            heuristic.append(h)

        bestIndex = argmax(heuristic)
        bestNode = children[bestIndex]
        v, gamma = self.rollout(bestNode, expand)

        node.value = node.value + (1.0 / node.visits) * (gamma * v -
                                                         node.value)
        return [v, gamma * self.gamma]
Пример #5
0
    def expandChildren(self):
        if not self.children is None:
            return

        self.children = []
        moves = self.state.moves()
        np = nextPlayer(self.player)
        sn = self.state.number()
        tiles = self.state.bdist * self.state.wdist
        for m in moves:
            nextNum = numberAfterMove(sn, tiles, m, self.player)
            n = self.owner.getTableNode(nextNum)  #check transposition table
            if not n is None:
                self.children.append(n)
                continue
            s = self.state.clone()
            s.setHexIndex(m, self.player)
            n = Node(s, np, self.owner)
            self.children.append(n)
            self.owner.saveTableNode(n)  #store in transposition table
Пример #6
0
    def expandChildren(self):
        if not self.children is None:
            return

        self.children = []
        moves = self.state.moves()
        np = nextPlayer(self.player)
        tiles = self.state.bdist * self.state.wdist
        for m in moves:
            nextNum = numberAfterMove(self.state.number(), tiles, m,
                                      self.player)
            n = getTableNode(nextNum)
            if not n is None:
                self.children.append(n)
                continue
            s = self.state.clone()
            s.setHexIndex(m, self.player)
            #self.children.append(s)
            n = Node(s, np)
            self.children.append(n)
            saveTableNode(nextNum, n)
Пример #7
0
    def rollout(self, node, expand):
        if node.winner:  #node is solved
            return [node.value, self.gamma]

        #have no children
        if node.children is None:
            if expand:
                node.expandChildren()
                expand = False

        node.visits += 1

        #if node is terminal, return
        if node.state.outcome() != 0:
            v = 1 if node.state.outcome() == self.player else -1
            node.winner = node.state.outcome()
            node.visits = largeNumber  #set visits to some large number so that this node is heavily weighted by multi-threaded players
            node.value = v
            return [v, 1.0]

        #Node isn't terminal, get its children (should be generated already)
        moves = node.state.moves()
        children = node.children

        #No children, simulate rollout
        if children is None:
            p = node.player
            s = node.state.clone()
            gamma = 1
            while s.outcome() == 0:
                s.randomMove(p)
                p = nextPlayer(p)
                gamma *= self.gamma
            v = 1.0 if s.outcome() == self.player else -1
            node.value = node.value + (1.0 / node.visits) * (
                v * gamma - node.value)  #value update
            return [v, gamma]

        #compute heuristic
        heuristic = []
        sgn = 1 if node.player == self.player else -1
        for cn in children:
            bonus = 3 if self.maximizeNonVisited and cn.visits == 0 else 0
            h = sgn * cn.value + self.expConst * math.sqrt(
                math.log(node.visits) / (cn.visits + 1)) + bonus
            heuristic.append(h)

        bestIndex = argmax(heuristic)
        bestNode = children[bestIndex]
        v, gamma = self.rollout(bestNode, expand)

        #Check for solved children
        allSolved = True
        for cn in children:
            if cn.winner == node.player:
                node.winner = node.player  #this node is solved because the player here can go to a node where they win
                node.visits = largeNumber
                node.value = cn.value
                return [node.value, 1.0]
            elif not cn.winner:
                allSolved = False
        if allSolved:  #all solved but no winner, so this node is a loss
            node.winner = nextPlayer(node.player)
            node.value = 1.0 if node.winner == self.player else -1.0
            return [node.value, 1.0]

        node.value = node.value + (1.0 / node.visits) * (gamma * v -
                                                         node.value)
        return [v, gamma * self.gamma]