def play(self, game: GameState, verbose=False): if self.alternate: temp = self.actionModel1 self.actionModel1 = self.actionModel2 self.actionModel2 = temp states = [] hs = [] if verbose: print(game) while not game.game_over(): actingModel = self.actionModel1 if game.turn() else self.actionModel2 move, h = actingModel.action(game) states.append(game.__copy__()) hs.append(h) game.push(move) if verbose: print("move: " + str(move) + " h: " + str(h)) print(game) return game.winner(), states, hs
def action(self, game: GameState): if game.game_over(): return None possibleMoves = game.legal_moves() boards = game.children() win_h = self.win_heuristic(boards) if not win_h == -1: return possibleMoves[win_h], 1 evals = self.heuristic.hs(boards) evals = self.default_heuristic(boards, evals) if self.variance: if not game.turn(): evals = [1 - e for e in evals] total = sum(evals) evals = [e / total for e in evals] choice = random.random() s = 0 for idx, e in enumerate(evals): s += e if choice <= s: bestIdx = idx break else: if game.turn(): bestIdx = np.argmax(evals) else: bestIdx = np.argmin(evals) bestMove = possibleMoves[bestIdx] return bestMove, evals[bestIdx]