def build_book(book, num_rounds=100): for num in range(num_rounds): state = Isolation() states = [] while state.ply_count <= 3: action = random.choice(state.actions()) player = state.player() states.append((state, player, action)) state = state.result(action) while not state.terminal_test(): action = alpha_beta(state, state.player()) player = state.player() state = state.result(action) win_0 = state.utility(0) > 0 win_1 = state.utility(1) > 0 assert win_0 != win_1 for s in states: state = s[0] player = s[1] action = s[2] if win_0: if player == 0: book[state][action] += 1 else: book[state][action] += -1 else: if player == 0: book[state][action] += -1 else: book[state][action] += 1 return book
def _simulation(self, state: Isolation, leaf_player_id) -> float: while True: if state.terminal_test(): return state.utility(leaf_player_id) state = state.result(random.choice(state.actions()))
class GenomeTester: def __init__(self, init_cell, genome, search_depth): assert (init_cell <= 114) and (init_cell >= 0), "Invalid opening cell value" self.init_cell = init_cell self.board = Isolation() self.player0_moves = 0 self.player1_moves = 0 self.genome = genome self.search_depth = search_depth self.active_player = 0 self.move_history = [] def run(self): ############################ ####### mini max ########## def minimax(state, depth, player_id): def min_value(state, depth, player_id): if state.terminal_test(): return state.utility(player_id) if depth <= 0: return score(state, player_id) value = float("inf") for action in state.actions(): value = min( value, max_value(state.result(action), depth - 1, player_id)) return value def max_value(state, depth, player_id): if state.terminal_test(): return state.utility(player_id) if depth <= 0: return score(state, player_id) value = float("-inf") for action in state.actions(): value = max( value, min_value(state.result(action), depth - 1, player_id)) return value return max( state.actions(), key=lambda x: min_value(state.result(x), depth - 1, player_id)) def score(state, player_id): own_loc = state.locs[player_id] opp_loc = state.locs[1 - player_id] own_liberties = state.liberties(own_loc) opp_liberties = state.liberties(opp_loc) return len(own_liberties) - len(opp_liberties) ####### mini max ########## ############################ if self.player0_moves == 0: self.board = self.board.result(self.init_cell) if self.player1_moves == 0: self.board = self.board.result(random.choice(self.board.actions())) while not self.board.terminal_test(): if self.active_player == 0: if self.player0_moves < len(self.genome): next_move = self.genome[self.player0_moves] if next_move not in self.board.actions(): # move is most likely blocked (not as bad as a loss) #return self.genome, NEG_INF_INT next_move = minimax(self.board, self.search_depth, player_id=0) else: next_move = minimax(self.board, self.search_depth, player_id=0) self.player0_moves += 1 self.active_player = 1 else: next_move = minimax(self.board, self.search_depth, player_id=1) self.player1_moves += 1 self.active_player = 0 self.board = self.board.result(next_move) if self.player0_moves < len(self.genome): self.move_history.append(next_move) player0_score = self.board.utility(player_id=0) if player0_score < 0: # lost return self.genome, float("-inf") elif player0_score == 0: # game didnt finish return self.genome, NEG_INF_INT else: return self.genome, -1.0 * (self.player0_moves + self.player1_moves)