def search(self, state, color, depth, subtree): """ path: if you choose 19 from enable=[13,19,20,21] (choose enable[1]) and then opponent choose 32 from enable=[14,24,32,53], (enable[2]) the path is [1,2] """ enable = reversi.getPossiblePoints(state, color) if depth == self.depth + 1: # no thinking (simulate) if len(enable) == 0: return self.simulate(state, 65, color) row, line = random.choice(enable) return self.simulate(state, row * 8 + line, color) if len(enable) == 0: return self.search(state, color ^ 1, depth + 1, subtree) if len(subtree) == 0: # first visit subtree.extend([[0, 0, []] for _ in enable]) wins = np.array([node[0] for node in subtree]) loses = np.array([node[1] for node in subtree]) values = beta.rvs(wins + 1, loses + 1) choice = values.argmax() row, line = enable[choice] reversi.putStone(state, row, line, color) r = self.search(state, color ^ 1, depth + 1, subtree[choice][2]) if r == color: subtree[choice][0] += 1 else: subtree[choice][1] += 1 return r
def play(self, state): enable = reversi.getPossiblePoints(state, self.color) if len(enable) == 0: return 65 else: put = random.choice(enable) return put[0] * 8 + put[1]
def act(self, state): leftStones = len(np.where(state == -1)[0]) if leftStones <= self.allSearchDepth: enables = reversi.getPossiblePoints(state, self.color) if len(enables) == 0: return 65 myWins = allSearch(state, self.color, enables, 0, self.allSearchDepth) choice = np.argmax(myWins) row, line = enables[choice] return row * 8 + line else: return self.play(state)
def allSearch(state, color, enables, nowDepth, maxDepth): """ 全探索 enables: list colorから見た勝ち石の数listを返す """ if nowDepth >= maxDepth: # stop searching return [ len(np.where(state == color)[0]) - len(np.where(state == color ^ 1)[0]) ] myWins = [] if len(enables) == 0: # pass opponentEnables = reversi.getPossiblePoints(state, color ^ 1) if len(opponentEnables) == 0: # game set myWins.append( len(np.where(state == color)[0]) - len(np.where(state == color ^ 1)[0])) else: opponentWins = allSearch(state, color ^ 1, opponentEnables, nowDepth + 1, maxDepth) mybest = min(opponentWins) myWins.append(-mybest) else: board = deepcopy(state) for row, line in enables: reversi.putStone(board, row, line, color) opponentEnables = reversi.getPossiblePoints(board, color ^ 1) opponentWins = allSearch(board, color ^ 1, opponentEnables, nowDepth + 1, maxDepth) mybest = min(opponentWins) myWins.append(-mybest) return myWins
def play(self, state): enable = reversi.getPossiblePoints(state, self.color) if len(enable) == 0: return 65 if len(enable) == 1: return enable[0][0] * 8 + enable[0][1] self.tree = [] for i in range(self.maxSize): tmpState = deepcopy(state) self.search(tmpState, self.color, 1, self.tree) wins = np.array([node[0] for node in self.tree]) loses = np.array([node[1] for node in self.tree]) # best = (wins + loses).argmax() best = beta.median(wins + 1, loses + 1).argmax() return enable[best][0] * 8 + enable[best][1]
def play(self, state): enable = reversi.getPossiblePoints(state, self.color) if len(enable) == 0: return 65 maxWin = -1 putPoint = None for row, line in enable: win = 0 for i in range(self.size): tmpState = deepcopy(state) r = self.simulate(tmpState, row * 8 + line, self.color) if r == self.color: # winner is me win += 1 if maxWin == -1 or maxWin < win: maxWin = win putPoint = row * 8 + line return putPoint
def act(self, state): print("\033[K\n" * 2 + "\033[2A", end="") if len(reversi.getPossiblePoints(state, self.color)) == 0: # pass turn print("\rplayer: pass\033[K", end="") return 65 while True: print("\r\033[K>>> ", end="") query = input() try: row, line = map(int, query.split(",")) except: print("input row, line (0-7)\033[1A", end="") continue if 0 <= row and row < 8 and 0 <= line and line < 8: # valid query print("\033[1A", end="") return row * 8 + line else: print("input row, line (0-7)\033[1A", end="")