def __init__(self, board_size, state=None, playerTurn=None): if not playerTurn: raise NotImplementedError self.pieces = {'1': 'X', '0': '-', '-1': 'O'} self.playerTurn = playerTurn self.board_size = board_size self.PASS_INDEX = self.board_size**2 self.dragons = {} self.z_table = set() self.zobrist = Zobrist(self.board_size) self.positions = [[ Position(x, y, self.board_size) for y in range(self.board_size) ] for x in range(self.board_size)] self.action_space = self.set_action_space() self.captures = {1: 0, -1: 0} self.passes = {1: False, -1: False} self.history = deque([], maxlen=14) self.zhash_history = deque([], maxlen=7) self._initialize_history() self.binary = self._binary() self.id = self._convertStateToId() self.allowedActions = self._allowedActions() self.isEndGame = self._checkForEndGame() self.value = self._getValue() self.score = self._getScore() self.newState = None
def __init__(self, board): self.board = np.array(board).copy() self.height = len(board) self.width = len(board[0]) assert self.width == self.height, "board is not square" self.size = self.width self.AIScoreCache = {} self.oppScoreCache = {} self.patternCacheCache = {} self.genCache = {} # onlyThree self.gen3Cache = {} self.steps = [] self.allSteps = [] self.zobrist = Zobrist(self.size) self.zobrist.init() # remember to re-initialize self._last = [False, False] # record the last step # store the scores self.AIScore = np.zeros([self.height, self.width]) self.oppScore = np.zeros([self.height, self.width]) # 用来控制时间,以免超时 self.startTime = None # 用来作为 self.hasNeighbor 的缓存 self.neighborCache = {} # point score for player X in direction Y (0, 1, 2, 3) for '--', '|', '\', '/' # self.pointCache.get(self.patternCache[X][position[0]][position[1]][Y], 0)[X-1] # PAY ATTENTION !!! its ==> [X - 1] <== !!! # self.patternCache has size (3, 20, 20, 4) # self.patternCache = [ # [], # [[[0, 0, 0, 0] for _ in range(self.width)] for _ in range(self.height)], # [[[0, 0, 0, 0] for _ in range(self.width)] for _ in range(self.height)], # ] self.patternCache = np.zeros([3, self.height, self.width, 4], dtype='int64') self.initScore() # TODO: check the usage of this table # self.statisticTable = np.zeros([self.height, self.width]) # print(self.patternCache[1]) # LDH niu pi self.attackRate = config.attackRate
def __init__(self, n, m): self.FT = Feature(n, m) self.Hash = Zobrist(n) self.BASE = np.array([10**i for i in range(self.FT.n_feature)]) self.WIN = self.BASE[-1] // 2 self.BIAS = np.array( [[n - abs(n // 2 - i) - abs(n // 2 - j) for i in range(n)] for j in range(n)]).flatten()
def search(self, state): # state (N, N) if not np.any(state): return self.N // 2, self.N // 2 self.Hash = Zobrist(self.N) out_state = self.alpha_beta(state, self.DEPTH, -self.IntMax, self.IntMax, 1) next_move = np.where(state != out_state) # print('Count:', self.Count) y, x = next_move return y[0], x[0]
class Board: def __init__(self, board): self.board = np.array(board).copy() self.height = len(board) self.width = len(board[0]) assert self.width == self.height, "board is not square" self.size = self.width self.scoreCache = np.zeros([2, 4, self.height, self.width]) self.genCache = {} # onlyThree self.gen3Cache = {} self.steps = [] self.allSteps = [] self.zobrist = Zobrist(self.size) self.zobrist.init() # remember to re-initialize self._last = [False, False] # record the last step # store the scores self.AIScore = np.zeros([self.height, self.width]) self.oppScore = np.zeros([self.height, self.width]) # 用来控制时间,以免超时 self.startTime = None # 用来作为 self.hasNeighbor 的缓存 self.neighborCache = {} # point score for player X in direction Y (0, 1, 2, 3) for '--', '|', '\', '/' # self.pointCache.get(self.patternCache[X][position[0]][position[1]][Y], 0)[X-1] # PAY ATTENTION !!! its ==> [X - 1] <== !!! # self.patternCache has size (3, 20, 20, 4) self.patternCache = [ [], [[[0, 0, 0, 0] for _ in range(self.width)] for _ in range(self.height)], [[[0, 0, 0, 0] for _ in range(self.width)] for _ in range(self.height)], ] # self.patternCache = np.zeros([3, 20, 20, 4], dtype='int64') self.initScore() # TODO: check the usage of this table # self.statisticTable = np.zeros([self.height, self.width]) # print(self.patternCache[1]) # LDH niu pi self.attackRate = config.attackRate def initScore(self): # TODO: check if this is equivalent to the p.item thing self.attack = {} self.score = {} self.role = {} self.scoreHum = {} self.scoreCom = {} # hhh, 这里用了外接字典,可以直接模式匹配 int => 分数,取代了 evaluate-point 函数 # with open('pointCache.txt', 'r') as f: # a = f.read() # self.pointCache = eval(a) self.pointCache = pointCache # 初始化 pattern 分数,主要用于应对靠近边上的点,即天然有墙堵着 for i in range(self.height): for j in range(self.width): if 5 <= i < self.height - 5 and 5 <= j < self.width - 5: continue # -- for dd in range(1, 6): y = j - dd if y < 0: self.patternCache[R.AI][i][j][0] += R.opp * R.mm ** (5 + dd) self.patternCache[R.opp][i][j][0] += R.AI * R.mm ** (5 + dd) break for dd in range(1, 6): y = j + dd if y >= self.width: self.patternCache[R.AI][i][j][0] += R.opp * R.mm ** (5 - dd) self.patternCache[R.opp][i][j][0] += R.AI * R.mm ** (5 - dd) break # | for dd in range(1, 6): x = i - dd if x < 0: self.patternCache[R.AI][i][j][1] += R.opp * R.mm ** (5 + dd) self.patternCache[R.opp][i][j][1] += R.AI * R.mm ** (5 + dd) break for dd in range(1, 6): x = i + dd if x >= self.height: self.patternCache[R.AI][i][j][1] += R.opp * R.mm ** (5 - dd) self.patternCache[R.opp][i][j][1] += R.AI * R.mm ** (5 - dd) break # \ for dd in range(1, 6): x, y = i - dd, j - dd if x < 0 or y < 0: self.patternCache[R.AI][i][j][2] += R.opp * R.mm ** (5 + dd) self.patternCache[R.opp][i][j][2] += R.AI * R.mm ** (5 + dd) break for dd in range(1, 6): x, y = i + dd, j + dd if x >= self.height or y >= self.width: self.patternCache[R.AI][i][j][2] += R.opp * R.mm ** (5 - dd) self.patternCache[R.opp][i][j][2] += R.AI * R.mm ** (5 - dd) break # / for dd in range(1, 6): x, y = i - dd, j + dd if x < 0 or y >= self.width: self.patternCache[R.AI][i][j][3] += R.opp * R.mm ** (5 + dd) self.patternCache[R.opp][i][j][3] += R.AI * R.mm ** (5 + dd) break for dd in range(1, 6): x, y = i + dd, j - dd if x >= self.height or y < 0: self.patternCache[R.AI][i][j][3] += R.opp * R.mm ** (5 - dd) self.patternCache[R.opp][i][j][3] += R.AI * R.mm ** (5 - dd) break # 注意初始化分数的更新顺序!(hhh, 好像无所谓了) for i in range(self.height): for j in range(self.width): # get score for both players if self.board[i, j] != R.empty: self.updateScore((i, j)) self.allSteps.append((i, j)) # 只更新一个点附近的分数 # 参见 evaluate 中的代码,为了优化性能,在更新分数的时候可以指定只更新某一个方向的分数 def updateScore(self, position, remove=False): # 更新 pattern, 再更新分数 if_remove = -1 if remove else 1 radius = 6 player = self.board[position[0]][position[1]] # 先更新自己的 # if player == R.AI: # self.oppScore[position[0]][position[1]] = 0 # elif player == R.opp: # self.AIScore[position[0]][position[1]] = 0 updatedPositions = [] # update no matter empty or not # -- for dd in range(0, radius): x, y = position[0], position[1] - dd if y < 0: break self.patternCache[R.AI][x][y][0] += player * R.mm ** (5 - dd) * if_remove self.patternCache[R.opp][x][y][0] += player * R.mm ** (5 - dd) * if_remove updatedPositions.append((x, y)) for dd in range(1, radius): x, y = position[0], position[1] + dd if y >= self.width: break self.patternCache[R.AI][x][y][0] += player * R.mm ** (5 + dd) * if_remove self.patternCache[R.opp][x][y][0] += player * R.mm ** (5 + dd) * if_remove updatedPositions.append((x, y)) # | for dd in range(0, radius): x, y = position[0] - dd, position[1] if x < 0: break self.patternCache[R.AI][x][y][1] += player * R.mm ** (5 - dd) * if_remove self.patternCache[R.opp][x][y][1] += player * R.mm ** (5 - dd) * if_remove updatedPositions.append((x, y)) for dd in range(1, radius): x, y = position[0] + dd, position[1] if x >= self.height: break self.patternCache[R.AI][x][y][1] += player * R.mm ** (5 + dd) * if_remove self.patternCache[R.opp][x][y][1] += player * R.mm ** (5 + dd) * if_remove updatedPositions.append((x, y)) # \ for dd in range(0, radius): x, y = position[0] - dd, position[1] - dd if x < 0 or y < 0: break self.patternCache[R.AI][x][y][2] += player * R.mm ** (5 - dd) * if_remove self.patternCache[R.opp][x][y][2] += player * R.mm ** (5 - dd) * if_remove updatedPositions.append((x, y)) for dd in range(1, radius): x, y = position[0] + dd, position[1] + dd if x >= self.height or y >= self.width: break self.patternCache[R.AI][x][y][2] += player * R.mm ** (5 + dd) * if_remove self.patternCache[R.opp][x][y][2] += player * R.mm ** (5 + dd) * if_remove updatedPositions.append((x, y)) # / for dd in range(0, radius): x, y = position[0] - dd, position[1] + dd if x < 0 or y >= self.width: break self.patternCache[R.AI][x][y][3] += player * R.mm ** (5 - dd) * if_remove self.patternCache[R.opp][x][y][3] += player * R.mm ** (5 - dd) * if_remove updatedPositions.append((x, y)) for dd in range(1, radius): x, y = position[0] + dd, position[1] - dd if x >= self.height or y < 0: break self.patternCache[R.AI][x][y][3] += player * R.mm ** (5 + dd) * if_remove self.patternCache[R.opp][x][y][3] += player * R.mm ** (5 + dd) * if_remove updatedPositions.append((x, y)) # 一次性更新所有需要更新分数的点 for p in updatedPositions: self.AIScore[p] = self.scorePoint(p, R.AI) self.oppScore[p] = self.scorePoint(p, R.opp) def scorePoint(self, position, player): result = 0 pattern = self.patternCache[player][position[0]][position[1]][0] result += self.pointCache[pattern][player - 1] pattern = self.patternCache[player][position[0]][position[1]][1] result += self.pointCache[pattern][player - 1] pattern = self.patternCache[player][position[0]][position[1]][2] result += self.pointCache[pattern][player - 1] pattern = self.patternCache[player][position[0]][position[1]][3] result += self.pointCache[pattern][player - 1] return result # get next move def put(self, position, player, record): if config.debug: print(player, 'put [', position, ']') self.board[position] = player self.zobrist.go(position, player) if record: # self.steps.append(position) self.updateScore(position) self.allSteps.append(position) # print(position, '=====', self.oppScore[position]) # the last step def last(self, player): for i in range(len(self.allSteps) - 1): p = self.allSteps[-i] if self.board[p] == player: return p return False def remove(self, position): r = self.board[position] if config.debug: print(r, 'remove [', position, ']') self.zobrist.go(position, r) self.updateScore(position, remove=True) self.allSteps.pop() self.board[position] = R.empty # TODO: 悔棋 def back(self): if len(self.steps) < 2: return s = self.steps.pop() self.zobrist.go(s, self.board[s]) self.board[s] = R.empty self.updateScore(s) self.allSteps.pop() s = self.steps.pop() self.zobrist.go(s, self.board[s]) self.board[s] = R.empty self.updateScore(s) self.allSteps.pop() def logSteps(self): # TODO: pass # 棋面估分 # 这里只算当前分,而不是在空位下一步之后的分 def evaluate(self): # 这里都是用正整数初始化的,所以初始值是0 self.AIMaxScore = 0 self.oppMaxScore = 0 # 遍历出最高分,开销不大 for i in range(self.height): for j in range(self.width): if self.board[i, j] == R.AI: self.AIMaxScore = max(self.AIScore[i, j], self.AIMaxScore) elif self.board[i, j] == R.opp: self.oppMaxScore = max(self.oppScore[i, j], self.oppMaxScore) # 有冲四延伸了,不需要专门处理冲四活三 # 不过这里做了这一步,可以减少电脑胡乱冲四的毛病 self.AIMaxScore = fixScore(self.AIMaxScore) self.oppMaxScore = fixScore(self.oppMaxScore) # TODO: check if 1/-1 is needed # result = (1 if player == R.AI else -1) * (self.AIMaxScore - self.oppMaxScore) result = self.AIMaxScore - self.oppMaxScore * self.attackRate return result def log(self): # TODO: pass # 启发函数 # # 变量starBread的用途是用来进行米子计算 # 所谓米子计算,只是,如果第一步尝试了一个位置A,那么接下来尝试的位置有两种情况: # 1: 大于等于活三的位置 # 2: 在A的米子位置上 # 注意只有对小于活三的棋才进行starSpread优化 # gen 函数的排序是非常重要的,因为好的排序能极大提升AB剪枝的效率。 # 而对结果的排序,是要根据role来的 def cache(self, result, onlyThree=False): if not config.cache: return if onlyThree: self.gen3Cache[self.zobrist.boardHashing[0]] = result else: self.genCache[self.zobrist.boardHashing[0]] = result def getCache(self, onlyThree=False): if not config.cache: return if onlyThree: result = self.gen3Cache.get(self.zobrist.boardHashing[0], None) else: result = self.genCache.get(self.zobrist.boardHashing[0], None) return result def gen(self, player, onlyThrees=False, starSpread=False): r = self.getCache(onlyThrees) if r: return r # if config.debugGen: # print("====== GEN for {} ======".format(player)) fives = [] AIfours = [] oppfours = [] AIblockedfours = [] oppblockedfours = [] AItwothrees = [] opptwothrees = [] AIthrees = [] oppthrees = [] AItwos = [] opptwos = [] neighbors = [] # 找到双方的最后进攻点 # lastPoint1 = None # lastPoint2 = None # 默认情况下 我们遍历整个棋盘。但是在开启star模式下,我们遍历的范围就会小很多 # 只需要遍历以两个点为中心正方形。 # 注意除非专门处理重叠区域,否则不要把两个正方形分开算,因为一般情况下这两个正方形会有相当大的重叠面积,别重复计算了 startI = 0 startJ = 0 endI = self.size - 1 endJ = self.size - 1 # TODO: 双星搜索有毛病 # if len(self.allSteps) >= 2 and starSpread and config.star: # # i = len(self.allSteps) - 1 # while not lastPoint1 and i >= 0: # p = self.allSteps[i] # if self.role.get(p, None) != player and self.attack.get(p, None) != player: # lastPoint1 = p # i -= 2 # # if not lastPoint1: # if self.role.get(self.allSteps[0], None) != player: # lastPoint1 = self.allSteps[0] # else: # lastPoint1 = self.allSteps[1] # i = len(self.allSteps) - 2 # while not lastPoint2 and i >= 0: # p = self.allSteps[i] # if self.attack.get(p, None): # lastPoint2 = p # i -= 2 # # if not lastPoint2: # if self.role.get(self.allSteps[0], None) == player: # lastPoint2 = self.allSteps[0] # else: # lastPoint2 = self.allSteps[1] # # # 根据双方最后的进攻点周围展开搜索 # if config.debugGen: # print("1 attack point: {}, 2 attack point: {}".format(lastPoint1, lastPoint2)) # # startI = min(lastPoint1[0] - 5, lastPoint2[0] - 5) # startJ = min(lastPoint1[1] - 5, lastPoint2[1] - 5) # startI = max(0, startI) # startJ = max(0, startJ) # endI = max(lastPoint1[0] + 5, lastPoint2[0] + 5) # endJ = max(lastPoint1[1] + 5, lastPoint2[1] + 5) # endI = min(self.size - 1, endI) # endJ = min(self.size - 1, endJ) for i in range(startI, endI + 1): for j in range(startJ, endJ + 1): p = (i, j) if self.board[i][j] == R.empty: neighbor = (2, 2) # 两步以内有 2 个子, restricted by calculation capability if len(self.allSteps) <= 2: neighbor = (1, 1) if self.hasNeighbor((i, j), neighbor[0], neighbor[1]): scoreOpp = self.oppScore[i][j] self.scoreHum[p] = scoreOpp scoreAI = self.AIScore[i][j] self.scoreCom[p] = scoreAI maxScore = max(scoreOpp, scoreAI) self.score[p] = maxScore self.role[p] = player # 标记当前点是为了进攻还是为了防守,后面会用到 if scoreAI > scoreOpp: self.attack[p] = R.AI # attack point else: self.attack[p] = R.opp # defend point # 双星延伸,以提升性能 # 思路:每次下的子,只可能是自己进攻,或者防守对面(也就是对面进攻点) # 我们假定任何时候,绝大多数情况下进攻的路线都可以按次序连城一条折线,那么每次每一个子,一定都是在上一个己方棋子的八个方向之一。 # 因为既可能自己进攻,也可能防守对面,所以是最后两个子的米子方向上 # 那么极少数情况,进攻路线无法连成一条折线呢?很简单,我们对前双方两步不作star限制就好,这样可以 兼容一条折线中间伸出一段的情况 # TODO: 双星搜索还不稳定 # if lastPoint1 and lastPoint2 and config.star: # # 距离必须在5步以内 # if (np.abs(i - lastPoint1[0]) > 5 or np.abs(j - lastPoint1[1]) > 5) and \ # (np.abs(i - lastPoint2[0]) > 5 or np.abs(j - lastPoint2[1]) > 5): # continue # # 必须在米子方向上 # if maxScore >= score['FIVE'] or \ # (i == lastPoint1[0] or j == lastPoint1[1] or ( # np.abs(i - lastPoint1[0]) == np.abs(j - lastPoint1[1]))) \ # or (i == lastPoint2[0] or j == lastPoint2[1] or ( # np.abs(i - lastPoint2[0]) == np.abs(j - lastPoint2[1]))): # pass # else: # continue if scoreAI >= scoreOpp: if scoreAI >= score['FIVE']: # 先看电脑能不能连成 5 return [p] elif scoreAI >= score['FOUR']: AIfours.append(p) elif scoreAI >= score['BLOCKED_FOUR']: AIblockedfours.append(p) elif scoreAI >= 2 * score['THREE']: # 能成双三也很强 AItwothrees.append(p) elif scoreAI >= score['THREE']: AIthrees.append(p) elif scoreAI >= score['TWO']: AItwos.append(p) else: neighbors.append(p) else: if scoreOpp >= score['FIVE']: # 再看玩家能不能连成 5 # 别急着返回,因为遍历还没完成,说不定电脑自己能成五 fives.append(p) elif scoreOpp >= score['FOUR']: oppfours.append(p) elif scoreOpp >= score['BLOCKED_FOUR']: oppblockedfours.append(p) elif scoreOpp >= 2 * score['THREE']: opptwothrees.append(p) elif scoreOpp >= score['THREE']: oppthrees.append(p) elif scoreOpp >= score['TWO']: opptwos.append(p) else: neighbors.append(p) # if config.debugGen: # print( # 'fives', fives, '\n', # 'AIfours', AIfours, '\n', # 'AI23', AItwothrees, '\n', # 'AI4s', AIblockedfours, '\n', # 'AI3s', AIthrees, '\n', # ) # print( # 'oppfours', oppfours, '\n', # 'opp23s', opptwothrees, '\n', # 'opp4s', oppblockedfours, '\n', # 'opp3s', oppthrees, '\n', # ) # 如果成五,是必杀棋,直接返回 if fives: self.cache(fives, onlyThrees) return fives # 自己能活四,则直接活四,不考虑冲四 if player == R.AI and AIfours: self.cache(fives, onlyThrees) return AIfours if player == R.opp and oppfours: self.cache(fives, onlyThrees) return oppfours # 对面有活四冲四,自己冲四都没,则只考虑对面活四 (此时对面冲四就不用考虑了) if player == R.AI and oppfours and not AIblockedfours: self.cache(fives, onlyThrees) return oppfours if player == R.opp and AIfours and not oppblockedfours: self.cache(fives, onlyThrees) return AIfours # 对面有活四自己有冲四,则都考虑下 fours = AIfours + oppfours if player == R.AI else oppfours + AIfours blockedfours = AIblockedfours + oppblockedfours if player == R.opp else oppblockedfours + AIblockedfours if fours: self.cache(fives, onlyThrees) return fours + blockedfours result = [] if player == R.AI: result = AItwothrees + opptwothrees \ + AIblockedfours \ + oppblockedfours \ + AIthrees \ + oppthrees if player == R.opp: result = opptwothrees + AItwothrees \ + oppblockedfours \ + AIblockedfours \ + oppthrees \ + AIthrees # TODO: 限制长度,讲道理来说这里最好是能全搜 if len(result) > config.countLimit: result = result[:config.countLimit] # 双三很特殊,因为能形成双三的不一定比一个活三强 if AItwothrees or opptwothrees: self.cache(fives, onlyThrees) return result # 只返回大于等于活三的棋 if onlyThrees: self.cache(fives, onlyThrees) return result if player == R.AI: twos = AItwos + opptwos else: twos = opptwos + AItwos # 从大到小排序 twos.sort(key=lambda x: self.score.get(x, 0), reverse=True) _toExtend = twos if twos else neighbors result.extend(_toExtend) # 这种分数低的,就不用全部计算了 if len(result) > config.countLimit: self.cache(fives, onlyThrees) return result[:config.countLimit] self.cache(fives, onlyThrees) return result def hasNeighbor(self, position, distance, count): # 3309 0.013 0.000 0.026 0.000 board.py:545(hasNeighbor) 11 10 3 7 # 520517 1.958 0.000 3.835 0.000 board.py:544(hasNeighbor) # this function will check the exact surrounding of the position # return TRUE is there are >= count neighbors # for example: distance = 1 # XXX # XOX # XXX # all the 'X's are neighbors of 'O' if self.board[position] == R.empty: startX = max(position[0] - distance, 0) endX = min(position[0] + distance + 1, self.size) startY = max(position[1] - distance, 0) endY = min(position[1] + distance + 1, self.size) if np.sum(self.board[startX:endX, startY:endY] != R.empty) >= count: return True else: return False else: startX = max(position[0] - distance, 0) endX = min(position[0] + distance + 1, self.size) startY = max(position[1] - distance, 0) endY = min(position[1] + distance + 1, self.size) if np.sum(self.board[startX:endX, startY:endY] != R.empty) >= count + 1: return True else: return False def get_value(self, player, position, deep, alpha, beta): # 这个函数得到的值应该是 player 下了这个点之后的 reward # 所以这里还没下 # 先看看能不能 win if self.win(player, position): # if config.debugAB: # print("{} win found!".format(player)) # print(self.board) return self.MAX if player == R.AI else self.MIN # 然后 player 下这个子 self.put(position, player, True) # if config.debugAB: # print("{} takes : {}".format(player, position)) # time out if time.clock() - self.startTime > config.timeLimit: self.remove(position) return 0.5 # if is leaf node if deep <= 0: r = self.evaluate() # if config.debugAB: # print("{} Score -------> {}".format(player, r)) # 记得撤掉之前 player 下的子 if self.win(R.get_opponent(player)): self.remove(position) if player == R.AI: return self.MIN elif player == R.opp: return self.MAX self.remove(position) return r result = 0 # MIN if player == R.AI: result = self.min_value(R.opp, deep - 1, alpha, beta) # MAX if player == R.opp: result = self.max_value(R.AI, deep - 1, alpha, beta) # 记得撤掉之前 player 下的子 self.remove(position) # if config.debugAB: # print("{} Score -------> {} at {}".format(player, result, position)) # 然后返回 return result def max_value(self, player, deep, alpha, beta): v = self.MIN # get successors successors = self.gen(player, starSpread=False) # if config.debugAB: # print("MAX({}) node successors: {} =====> Deep: {}".format(player, successors, deep)) for point in successors: v = max(v, self.get_value(player, point, deep, alpha, beta)) # pruning if v >= beta: return v alpha = max(v, alpha) return v def min_value(self, player, deep, alpha, beta): v = self.MAX # get successors successors = self.gen(player, starSpread=False) # if config.debugAB: # print("MIN({}) node successors: {} =====> Deep: {}".format(player, successors, deep)) for point in successors: v = min(v, self.get_value(player, point, deep, alpha, beta)) # pruning if v <= alpha: return v beta = min(v, beta) return v def negamax(self, deep): self.MIN = -1 * score['FIVE'] * 10 self.MAX = score['FIVE'] * 10 bestPoints = [] best = self.MIN # 生成可选点,最开始的时候不要开启 star 搜索 candidates = self.gen(R.AI, starSpread=False) # if config.debug2: # print(" =================> Candidates: {}".format(candidates)) if len(candidates) == 1: return candidates[0], 1 cand_len = len(candidates) for i in range(cand_len): point = candidates[i] # if config.debug: # print('++++++++++++++++++ {} ++++++++++++++++++'.format(point)) # print('time: {}'.format(time.clock() - self.startTime)) # 超时判定并且截断搜索 if time.clock() - self.startTime > config.timeLimit: # if config.debug2: # print('TIME OUT!') # print('Points left: {}'.format(candidates[i:])) break # if config.debugAB: # print("ROOT ====> {} <==== TOOR".format(point)) v = self.get_value(R.AI, point, deep, self.MIN, self.MAX) # if config.debug2: # print("{} , score {}".format(point, v)) # 如果比之前的一个好,则把当前位子加入待选位子 if 0.2 < v < 0.8: # time out break if v == best: bestPoints.append(point) if v > best: best = v bestPoints = [point] # if config.debug2: # print(bestPoints) bestPoints.sort(key=lambda x: fixFour(self.AIScore[x]), reverse=True) result = bestPoints[0] return result, 0 def maxmin(self, deep): self.MAX = score['FIVE'] * 10 self.MIN = - score['FIVE'] * 10 bestPoints = [] best = self.MIN if config.debug: print(self.AIScore) # 这个函数的作用是生成待选的列表,就是可以下子的空位 points = self.gen(R.AI, starSpread=True) # points = self.genEE(deep) if config.debug2: print(points) # 如果只有一个候选点,直接返回,省时间 if len(points) == 1: return points[0] for i in range(len(points)): p = points[i] if time.clock() - self.startTime > config.timeLimit: if config.debug: print('TIME OUT!') print('Points left: {}'.format(points[i:])) break # 尝试下一个子 if config.debug3: print("ROOT ===> AI takes : {} <=== ROOT".format(p)) self.put(p, R.AI, True) # print("piint {}: {}".format(p, self.AIScore[p])) # 找最大值 v = self.get_min(R.opp, deep - 1, self.MIN, self.MAX) # 记得把尝试下的子移除 self.remove(p) if config.debug2: print("{} , score {}".format(p, v)) # 如果比之前的一个好,则把当前位子加入待选位子 if v == best: bestPoints.append(p) if v > best: best = v bestPoints = [p] if config.debug2: print(bestPoints) result_index = np.random.randint(len(bestPoints)) result = bestPoints[result_index] return result def get_min(self, player, deep, alpha, beta): # 重点来了,评价函数,这个函数返回的是对当前局势的估分 if deep <= 0: r = self.evaluate() if config.debug3: print('MIN Score ====== {} ======'.format(r)) print() return r if config.debug3: print('MIN====== {} ======'.format(player)) # print(self.board) v = self.MAX points = self.gen(player, starSpread=True) if config.debug3: print('2 ===> ', points) # points = self.genEE(deep) for i in range(len(points)): p = points[i] if config.debug3: print("OPP takes : {}".format(p)) if self.win(player, p): return self.MIN self.put(p, player, True) v = min(v, self.get_max(R.get_opponent(player), deep - 1, alpha, beta)) # 记得把尝试下的子移除 self.remove(p) # 进行剪枝操作 if v <= alpha: return v beta = min(beta, v) return v def get_max(self, player, deep, alpha, beta): if deep <= 0: r = self.evaluate() if config.debug3: print('MAX Score ====== {} ======'.format(r)) print() return r if config.debug3: print('MAX====== {} ======'.format(player)) # print(self.board) v = self.MIN points = self.gen(player, starSpread=True) # points = self.genEE(deep) if config.debug3: print('1 ===> ', points) for i in range(len(points)): p = points[i] if config.debug3: print("AI takes : {}".format(p)) if self.win(player, p): return self.MAX self.put(p, player, True) v = max(v, self.get_min(R.get_opponent(player), deep - 1, alpha, beta)) # 记得把尝试下的子移除 self.remove(p) # 进行剪枝操作 if v >= beta: return v alpha = max(alpha, v) return v def win(self, player, position=None): if position is None: if player == R.AI: five_ = np.max(self.AIScore) elif player == R.opp: five_ = np.max(self.oppScore) if five_ >= score['FIVE']: return player else: if player == R.AI: r = self.AIScore[position[0]][position[1]] elif player == R.opp: r = self.oppScore[position[0]][position[1]] if r >= score['FIVE']: return player return False
class Board(object): def __init__(self, board_size, state=None, playerTurn=None): if not playerTurn: raise NotImplementedError self.pieces = {'1': 'X', '0': '-', '-1': 'O'} self.playerTurn = playerTurn self.board_size = board_size self.PASS_INDEX = self.board_size**2 self.dragons = {} self.z_table = set() self.zobrist = Zobrist(self.board_size) self.positions = [[ Position(x, y, self.board_size) for y in range(self.board_size) ] for x in range(self.board_size)] self.action_space = self.set_action_space() self.captures = {1: 0, -1: 0} self.passes = {1: False, -1: False} self.history = deque([], maxlen=14) self.zhash_history = deque([], maxlen=7) self._initialize_history() self.binary = self._binary() self.id = self._convertStateToId() self.allowedActions = self._allowedActions() self.isEndGame = self._checkForEndGame() self.value = self._getValue() self.score = self._getScore() self.newState = None @property def next_dragon(self): if not self.dragons: return 1 return max(self.dragons.keys()) + 1 def set_action_space(self): rv = [ 0 for y in range(self.board_size) for x in range(self.board_size) ] rv.append(0) # for pass return np.array(rv) def switch_player(self): self.playerTurn = self.playerTurn * -1 def _initialize_history(self): state = np.array([ np.zeros(self.board_size, dtype=np.int) for z in range(self.board_size) ]) for i in range(14): self.history.append(state) def player_as_layer(self): player_layer = np.array([ np.ones(self.board_size, dtype=np.int) for z in range(self.board_size) ]) return player_layer * self.playerTurn def _checkForEndGame(self): score = 0 if np.array(list(self.passes.values())).all(): return 1 return 0 def _getValue(self): # This is the value of the state for the current player # i.e. if the previous player played a winning move, you lose score = 0 if np.array(self.passes).all(): score = self._score() if score == -1 * self.playerTurn: return (-1, -1, 1) else: return (-1, 1, -1) return (0, 0, 0) def _getScore(self): tmp = self.value return (tmp[1], tmp[2]) def _binary(self): currentplayer_position = np.array([ np.zeros(self.board_size, dtype=np.int) for z in range(self.board_size) ]) other_position = np.array([ np.zeros(self.board_size, dtype=np.int) for z in range(self.board_size) ]) for x, row in enumerate(self.positions): for y, val in enumerate(row): if val.player == self.playerTurn: currentplayer_position[x][y] = 1 elif val.player == (-1) * self.playerTurn: other_position[x][y] = 1 currentplayer_position = currentplayer_position.flatten() other_position = other_position.flatten() position = np.append(currentplayer_position, other_position) return position def _convertStateToId(self): currentplayer_position = np.array([ np.zeros(self.board_size, dtype=np.int) for z in range(self.board_size) ]) other_position = np.array([ np.zeros(self.board_size, dtype=np.int) for z in range(self.board_size) ]) for x, row in enumerate(self.positions): for y, val in enumerate(row): if val.player == ( -1 ) * self.playerTurn: # this is inverted because we swith player before calling this currentplayer_position[x][y] = 1 elif val.player == self.playerTurn: other_position[x][y] = 1 currentplayer_position = currentplayer_position.flatten() other_position = other_position.flatten() if self.passes[self.playerTurn]: currentplayer_position = np.append(currentplayer_position, 1) other_position = np.append(other_position, 1) else: currentplayer_position = np.append(currentplayer_position, 0) other_position = np.append(other_position, 0) if self.passes[-1 * self.playerTurn]: other_position = np.append(other_position, 1) currentplayer_position = np.append(currentplayer_position, 1) else: other_position = np.append(other_position, 0) currentplayer_position = np.append(currentplayer_position, 0) position = np.append(currentplayer_position, other_position) _id = ''.join(map(str, position)) str_actions = '-'.join(map(str, self._allowedActions())) _id += '-' + str_actions return _id def dump_state_example(self): history = list(self.history) history.append(self.player_as_layer()) return np.stack(history) def update_history(self): player_one_state = np.array([ np.zeros(self.board_size, dtype=np.int) for z in range(self.board_size) ]) player_neg_one_state = np.array([ np.zeros(self.board_size, dtype=np.int) for z in range(self.board_size) ]) for idx, row in enumerate(self.positions): for idy, pos in enumerate(row): if pos.player == 1: player_one_state[idx][idy] = 1 elif pos.player == -1: player_neg_one_state[idx][idy] = 1 self.history.append(player_one_state) self.history.append(player_neg_one_state) def act(self, loc): result = {'valid': True, 'captures': {1: 0, -1: 0}} pos = self.pos_by_location(loc) rv = self.imagine_position(pos, self.playerTurn) if rv['occupied'] or rv['suicide'] or rv['repeat']: result['valid'] = False else: friendly_dragons = list(rv['stitched']) touched_dragons = set() pos.occupy(self.playerTurn) if not friendly_dragons: dragon_id = self.create_new_dragon() self.dragons[dragon_id].add_member(pos) touched_dragons.add(self.dragons[dragon_id]) else: base_dragon = friendly_dragons[0] base_dragon.add_member(pos) for dragon in friendly_dragons[1:]: self.stitch_dragons(base_dragon.identifier, dragon.identifier) touched_dragons.add(base_dragon) if rv['captured']: for dragon in rv['captured']: result['captures'][self.playerTurn] += self.capture_dragon( dragon.identifier) touched_dragons.update(rv['opp_neighbor']) for dragon in touched_dragons: dragon.update() self.passes = {1: False, -1: False} self.z_table.add(rv['zhash']) self.update_history() self.switch_player() self.allowedActions = self._allowedActions() self.id = self._convertStateToId() return result def take_action(self, loc): """ Wrapper for act for DRL to use """ done = 0 value = 0 self.act(loc) return value, done def player_pass(self): done = 0 value = 0 self.update_history() self.passes[self.playerTurn] = True self.switch_player() if self.passes[-1] and self.passes[1]: winner = self._score() if winner == -1 * self.playerTurn: value = 1 else: value = -1 done = 1 allowed_actions = [] else: allowed_actions = self._allowedActions() self.value = self._getValue() self.score = self._getScore() self.allowedActions = allowed_actions self.id = self._convertStateToId() return value, done def takeAction(self, flat_array_index): self.newState = pickle.loads(pickle.dumps(self)) if flat_array_index != self.PASS_INDEX: loc = (flat_array_index // self.board_size, flat_array_index % self.board_size) value, done = self.newState.take_action(loc) else: value, done = self.newState.player_pass() if self.newState._checkForEndGame() == 1: self.newState.value = self.newState._getValue() self.newState.score = self.newState._getScore() winner = self.newState._score() if winner == self.newState.playerTurn: value = 1 else: value = -1 done = 1 return self.newState, value, done def add_up_score(self, player, dragons): for d in dragons: self.captures[player] += len(d.members) def _score(self): rv = self.set_empty_dragons() for x in [-1, 1]: self.add_up_score(x, rv[x]) caps = [self.captures[-1], -100000, self.captures[1]] if caps[0] == caps[2]: return -1 return caps.index( max(caps)) - 1 # Okay this is embarassing hack. sorry def allowed_plays(self, for_player): allowed = [] open_pos = [] for row in self.positions: for pos in row: if not pos.is_occupied: open_pos.append(pos) for pos in open_pos: rv = self.imagine_position(pos, for_player) if rv['suicide'] or rv['repeat']: continue allowed.append(pos.loc) return allowed def _allowedActions(self): legal = [(x * self.board_size) + y for x, y in self.allowed_plays(self.playerTurn)] legal.append(self.PASS_INDEX) return np.array(legal) def imagine_zobrist(self, pos, captures, player): board = self.fake_board(pos, captures, player) return self.zobrist.get_hash(board, fake=True) def create_new_dragon(self): dragon_id = self.next_dragon self.dragons[dragon_id] = Dragon(dragon_id, self) return dragon_id def pos_by_location(self, tup): ''' Get instance at position from a tuple index Args: tup (int, int) Return: Position instance ''' # x = tup[0] # y = tup[1] # if x < 0 or x > self.board_size or y < 0 or y > self.board_size: # raise NotImplementedError('Tuple outside of board size of {}'.format(self.board_size)) return self.positions[tup[0]][tup[1]] # return self.positions[x][y] def stitch_dragons(self, d1_id, d2_id): """ Stitch two dragons into one. Args: d1, d2 2 keys to the dragon dictionary Return: Dragon instance """ d1 = self.dragons[d1_id] d2 = self.dragons[d2_id] if not d1.neighbors.intersection(d2.members): raise NotImplementedError('Cannot merge unconnected dragons.') for member in d2.members: d1.add_member(member, force=True) del self.dragons[d2_id] del d2 return d1 def get_neighboring_dragons(self, pos, player): neighbors = [self.pos_by_location(x) for x in pos.neighbors_locs] rv = set() for x in neighbors: if x.dragon and x.player == player: rv.add(self.dragons[x.dragon]) return rv def get_opposing_player(self, player): if not player: return 0 elif player == 1: return -1 return 1 def fake_board(self, pos, captures, player): fake = [] for row in self.positions: fake_row = [] for point in row: fake_row.append(point.player) fake.append(fake_row) fake[pos.x][pos.y] = player for captured_dragon in captures: for capture in captured_dragon.members: fake[capture.x][capture.y] = 0 return fake def imagine_position(self, pos, player): """ For a given position instance, imagine the outcome playing there. Args: pos Position instance player str: 1 or -1 Returns: dict {'suicide': bool, 'captured': list of dragon instances, 'stitched': list of dragon instances} """ rv = { 'suicide': False, 'occupied': False, 'repeat': False, 'zhash': None, 'captured': set(), 'opp_neighbor': set(), 'stitched': set() } if pos.is_occupied: rv['occupied'] = True return rv opposing_dragons = self.get_neighboring_dragons( pos, self.get_opposing_player(player)) self_dragons = self.get_neighboring_dragons(pos, player) neighbors = [self.pos_by_location(x) for x in pos.neighbors_locs] liberties = [x for x in neighbors if not x.is_occupied] for opp_dragon in opposing_dragons: if opp_dragon.liberties == {pos}: rv['captured'].add(opp_dragon) else: rv['opp_neighbor'].add(opp_dragon) check_for_suicide = self.imagine_stitched_valid(pos, self_dragons) if liberties or check_for_suicide: rv['stitched'] = self_dragons elif not liberties and not rv['captured']: rv['suicide'] = True rv['zhash'] = self.imagine_zobrist(pos, rv['captured'], player) if rv['zhash'] in self.z_table: rv['repeat'] = True return rv def imagine_stitched_valid(self, pos, dragons): """ Checks to see if the current position is the last liberty of all associated dragons """ if not dragons: return False liberties = set() for dragon in dragons: liberties.update(dragon.liberties) if liberties == {pos}: return False return True def capture_dragon(self, dragon_id): d = self.dragons[dragon_id] captures = len(d.members) opposing_dragons = set() for pos in d.members: opposing_dragons.update( self.get_neighboring_dragons( pos, self.get_opposing_player(d.player))) pos.player = 0 pos.dragon = None for dragon in opposing_dragons: dragon.update() del self.dragons[dragon_id] del d return captures def set_empty_dragons(self): rv = {'all_dragons': set(), 1: set(), -1: set()} empty = [] for x in range(self.board_size): for y in range(self.board_size): if not self.positions[x][y].is_occupied: empty.append(self.positions[x][y]) for pos1 in empty: if not pos1.dragon: n_dragons = list(self.get_neighboring_dragons(pos1, 0)) if n_dragons: d1 = n_dragons[0] d1.add_member(pos1) rv['all_dragons'].add(d1.identifier) for d in n_dragons[1:]: other_id = d.identifier self.stitch_dragons(d1.identifier, other_id) rv['all_dragons'].discard(other_id) dragon = d1 else: dragon_id = self.create_new_dragon() dragon = self.dragons[dragon_id] rv['all_dragons'].add(dragon.identifier) dragon.add_member(pos1) else: try: dragon = self.dragons[pos1.dragon] except KeyError: print(self.dragons) print(pos1.dragon) print(pos1) print(self.to_ascii()) raise NotImplementedError for pos2 in empty: if pos1 == pos2: continue if pos2 in dragon.neighbors: dragon.add_member(pos2) for d_id in rv['all_dragons']: try: d = self.dragons[d_id] except KeyError: print(rv['all_dragons']) print(d_id) print(self.to_ascii()) raise NotImplementedError surr_color = set() for x in d.neighbors: surr_color.add(x.player) if len(surr_color) == 1: rv[list(surr_color)[0]].add(d) return rv def to_ascii(self): board = '' for row in self.positions: r = '' for pos in row: if pos.player == 1: player = 'x ' elif pos.player == -1: player = 'o ' else: player = '. ' r += player r += '\n' board += r return board def render(self, logger): board = '' for row in self.positions: r = '' for pos in row: if pos.player == 1: player = 'x ' elif pos.player == -1: player = 'o ' else: player = '. ' r += player r += '\n' board += r logger.info(board) logger.info('--------------')