def deadPattern1(self, tempBoard, shortcutState): modified = False for s in range(4, len(tempBoard)): if tempBoard[s] == 0: continue c1 = tempBoard[s] c2 = nextPlayer(c1) su = self.state.dirAdj(s, "u") if su == -1 or tempBoard[su] != c1: continue m = self.state.dirAdj(s, "ur") if m == -1 or tempBoard[m] != 0: continue sr = self.state.dirAdj(m, "dr") if sr == -1 or tempBoard[sr] != c2: continue sru = self.state.dirAdj(sr, "u") if sru == -1 or tempBoard[sru] != c2: continue tempBoard[m] = nextPlayer(self.player) self.deadCells[m] = nextPlayer(self.player) shortcutState.setHexIndex(m, nextPlayer(self.player)) modified = True return modified
def rollout(self, state, player): #Get this state's info info = [] if state.number() in self.stateInfo.keys(): info = self.stateInfo[state.number()] else: info = [0, 0] info[0] += 1 #If state is terminal if state.outcome() != 0: v = 1 if state.outcome() == self.playerNumber else -1 info[1] = v self.stateInfo[state.number()] = info return v #State is not terminal, get children moves = state.moves() states = [] childInfo = [] for m in moves: s = state.clone() s.setHexIndex(m, player) states.append(s) if s.number() in self.stateInfo.keys(): childInfo.append(self.stateInfo[s.number()]) else: childInfo.append([0, 0]) #Evaluate heuristic heuristic = [] sgn = 1 if player == self.playerNumber else -1 for i in range(len(states)): bonus = 3.0 if self.maximizeNonVisited and childInfo[i][ 0] == 0 else 0 h = sgn * childInfo[i][1] + self.expConst * math.sqrt( math.log(info[0]) / (childInfo[i][0] + 1)) + bonus heuristic.append(h) #Pick best option according to current player bestIndex = argmax(heuristic) bestState = states[bestIndex] outcome = self.rollout(bestState, nextPlayer(player)) if self.bootstrap: est = self.stateInfo[bestState.number()][1] info[1] = info[1] + (1.0 / info[0]) * (outcome * 0.5 + est * 0.5 - info[1]) else: info[1] = info[1] + (1.0 / info[0]) * (outcome - info[1]) self.stateInfo[state.number()] = info for i in range(len(states)): self.stateInfo[states[i].number()] = childInfo[i] return outcome
def expandChildren(self): if not self.children is None: return self.children = [] moves = self.state.moves() np = nextPlayer(self.player) for m in moves: s = self.state.clone() s.setHexIndex(m, self.player) #self.children.append(s) self.children.append(Node(s, np))
def rollout(self, node, expand): #have no children if node.children is None: if expand: node.expandChildren() expand = False node.visits += 1 #if node is terminal, return if node.state.outcome() != 0: v = 1 if node.state.outcome() == self.playerNumber else -1 node.value = v return [v, self.gamma] #Node isn't terminal, get its children (should be generated already) moves = node.state.moves() children = node.children #No children, simulate rollout if children is None: p = node.player s = node.state.clone() gamma = 1 while s.outcome() == 0: s.randomMove(p) p = nextPlayer(p) gamma *= self.gamma v = 1.0 if s.outcome() == self.playerNumber else -1 node.value = node.value + (1.0 / node.visits) * (v * gamma - node.value) return [v, gamma * self.gamma] #compute heuristic heuristic = [] sgn = 1 if node.player == self.playerNumber else -1 for cn in children: bonus = 3 if self.maximizeNonVisited and cn.visits == 0 else 0 h = sgn * cn.value + self.expConst * math.sqrt( math.log(node.visits) / (cn.visits + 1)) + bonus heuristic.append(h) bestIndex = argmax(heuristic) bestNode = children[bestIndex] v, gamma = self.rollout(bestNode, expand) node.value = node.value + (1.0 / node.visits) * (gamma * v - node.value) return [v, gamma * self.gamma]
def expandChildren(self): if not self.children is None: return self.children = [] moves = self.state.moves() np = nextPlayer(self.player) sn = self.state.number() tiles = self.state.bdist * self.state.wdist for m in moves: nextNum = numberAfterMove(sn, tiles, m, self.player) n = self.owner.getTableNode(nextNum) #check transposition table if not n is None: self.children.append(n) continue s = self.state.clone() s.setHexIndex(m, self.player) n = Node(s, np, self.owner) self.children.append(n) self.owner.saveTableNode(n) #store in transposition table
def expandChildren(self): if not self.children is None: return self.children = [] moves = self.state.moves() np = nextPlayer(self.player) tiles = self.state.bdist * self.state.wdist for m in moves: nextNum = numberAfterMove(self.state.number(), tiles, m, self.player) n = getTableNode(nextNum) if not n is None: self.children.append(n) continue s = self.state.clone() s.setHexIndex(m, self.player) #self.children.append(s) n = Node(s, np) self.children.append(n) saveTableNode(nextNum, n)
def rollout(self, node, expand): if node.winner: #node is solved return [node.value, self.gamma] #have no children if node.children is None: if expand: node.expandChildren() expand = False node.visits += 1 #if node is terminal, return if node.state.outcome() != 0: v = 1 if node.state.outcome() == self.player else -1 node.winner = node.state.outcome() node.visits = largeNumber #set visits to some large number so that this node is heavily weighted by multi-threaded players node.value = v return [v, 1.0] #Node isn't terminal, get its children (should be generated already) moves = node.state.moves() children = node.children #No children, simulate rollout if children is None: p = node.player s = node.state.clone() gamma = 1 while s.outcome() == 0: s.randomMove(p) p = nextPlayer(p) gamma *= self.gamma v = 1.0 if s.outcome() == self.player else -1 node.value = node.value + (1.0 / node.visits) * ( v * gamma - node.value) #value update return [v, gamma] #compute heuristic heuristic = [] sgn = 1 if node.player == self.player else -1 for cn in children: bonus = 3 if self.maximizeNonVisited and cn.visits == 0 else 0 h = sgn * cn.value + self.expConst * math.sqrt( math.log(node.visits) / (cn.visits + 1)) + bonus heuristic.append(h) bestIndex = argmax(heuristic) bestNode = children[bestIndex] v, gamma = self.rollout(bestNode, expand) #Check for solved children allSolved = True for cn in children: if cn.winner == node.player: node.winner = node.player #this node is solved because the player here can go to a node where they win node.visits = largeNumber node.value = cn.value return [node.value, 1.0] elif not cn.winner: allSolved = False if allSolved: #all solved but no winner, so this node is a loss node.winner = nextPlayer(node.player) node.value = 1.0 if node.winner == self.player else -1.0 return [node.value, 1.0] node.value = node.value + (1.0 / node.visits) * (gamma * v - node.value) return [v, gamma * self.gamma]