示例#1
0
    def search(self, state, color, depth, subtree):
        """
        path: if you choose 19 from enable=[13,19,20,21] (choose enable[1]) 
            and then opponent choose 32 from enable=[14,24,32,53], (enable[2])
            the path is [1,2]
        """
        enable = reversi.getPossiblePoints(state, color)
        if depth == self.depth + 1:
            # no thinking (simulate)
            if len(enable) == 0:
                return self.simulate(state, 65, color)
            row, line = random.choice(enable)
            return self.simulate(state, row * 8 + line, color)

        if len(enable) == 0:
            return self.search(state, color ^ 1, depth + 1, subtree)

        if len(subtree) == 0:
            # first visit
            subtree.extend([[0, 0, []] for _ in enable])

        wins = np.array([node[0] for node in subtree])
        loses = np.array([node[1] for node in subtree])
        values = beta.rvs(wins + 1, loses + 1)
        choice = values.argmax()
        row, line = enable[choice]
        reversi.putStone(state, row, line, color)
        r = self.search(state, color ^ 1, depth + 1, subtree[choice][2])
        if r == color:
            subtree[choice][0] += 1
        else:
            subtree[choice][1] += 1
        return r
示例#2
0
 def play(self, state):
     enable = reversi.getPossiblePoints(state, self.color)
     if len(enable) == 0:
         return 65
     else:
         put = random.choice(enable)
         return put[0] * 8 + put[1]
示例#3
0
 def act(self, state):
     leftStones = len(np.where(state == -1)[0])
     if leftStones <= self.allSearchDepth:
         enables = reversi.getPossiblePoints(state, self.color)
         if len(enables) == 0:
             return 65
         myWins = allSearch(state, self.color, enables, 0,
                            self.allSearchDepth)
         choice = np.argmax(myWins)
         row, line = enables[choice]
         return row * 8 + line
     else:
         return self.play(state)
示例#4
0
def allSearch(state, color, enables, nowDepth, maxDepth):
    """
        全探索

        enables: list
        colorから見た勝ち石の数listを返す
        """
    if nowDepth >= maxDepth:
        # stop searching
        return [
            len(np.where(state == color)[0]) -
            len(np.where(state == color ^ 1)[0])
        ]

    myWins = []
    if len(enables) == 0:
        # pass
        opponentEnables = reversi.getPossiblePoints(state, color ^ 1)
        if len(opponentEnables) == 0:
            # game set
            myWins.append(
                len(np.where(state == color)[0]) -
                len(np.where(state == color ^ 1)[0]))
        else:
            opponentWins = allSearch(state, color ^ 1, opponentEnables,
                                     nowDepth + 1, maxDepth)
            mybest = min(opponentWins)
            myWins.append(-mybest)
    else:
        board = deepcopy(state)
        for row, line in enables:
            reversi.putStone(board, row, line, color)
            opponentEnables = reversi.getPossiblePoints(board, color ^ 1)
            opponentWins = allSearch(board, color ^ 1, opponentEnables,
                                     nowDepth + 1, maxDepth)
            mybest = min(opponentWins)
            myWins.append(-mybest)
    return myWins
示例#5
0
 def play(self, state):
     enable = reversi.getPossiblePoints(state, self.color)
     if len(enable) == 0:
         return 65
     if len(enable) == 1:
         return enable[0][0] * 8 + enable[0][1]
     self.tree = []
     for i in range(self.maxSize):
         tmpState = deepcopy(state)
         self.search(tmpState, self.color, 1, self.tree)
     wins = np.array([node[0] for node in self.tree])
     loses = np.array([node[1] for node in self.tree])
     # best = (wins + loses).argmax()
     best = beta.median(wins + 1, loses + 1).argmax()
     return enable[best][0] * 8 + enable[best][1]
示例#6
0
 def play(self, state):
     enable = reversi.getPossiblePoints(state, self.color)
     if len(enable) == 0:
         return 65
     maxWin = -1
     putPoint = None
     for row, line in enable:
         win = 0
         for i in range(self.size):
             tmpState = deepcopy(state)
             r = self.simulate(tmpState, row * 8 + line, self.color)
             if r == self.color:  # winner is me
                 win += 1
         if maxWin == -1 or maxWin < win:
             maxWin = win
             putPoint = row * 8 + line
     return putPoint
示例#7
0
    def act(self, state):
        print("\033[K\n" * 2 + "\033[2A", end="")
        if len(reversi.getPossiblePoints(state, self.color)) == 0:
            # pass turn
            print("\rplayer: pass\033[K", end="")
            return 65
        while True:
            print("\r\033[K>>> ", end="")
            query = input()
            try:
                row, line = map(int, query.split(","))
            except:
                print("input row, line (0-7)\033[1A", end="")
                continue

            if 0 <= row and row < 8 and 0 <= line and line < 8:
                # valid query
                print("\033[1A", end="")
                return row * 8 + line
            else:
                print("input row, line (0-7)\033[1A", end="")