def evaluate_all_valid_moves(self, board, player_id, config=None): """Estimate of the reward for each possible move""" scores = { move: self.evaluate_action(board, move, player_id, config) for move in get_valid_moves(board, config) } return scores
def minimax_score(board, player_to_move, player_to_optimize, cache={}): board_cache_key = str(board) if board_cache_key in cache: return cache[board_cache_key] winner = utils.get_winning_player(board) if winner is not None: if winner == player_to_optimize: return 10 elif winner == player_to_move: return -10 elif utils.check_for_tie(board): return 0 valid_moves = utils.get_valid_moves(board) scores = [] for move in valid_moves: _board = copy.deepcopy(board) _board = utils.make_move(_board, move, player_to_move) opponent = 'O' if player_to_move == 'X' else 'X' score = minimax_score(_board, opponent, player_to_optimize, cache) scores.append(score) if player_to_move == player_to_optimize: cache[board_cache_key] = max(scores) if len(scores) > 0 else -10 else: cache[board_cache_key] = min(scores) if len(scores) > 0 else 10 return cache[board_cache_key]
def find_winning_move_helper(board, player): valid_moves = utils.get_valid_moves(board) _board = copy.deepcopy(board) for move in valid_moves: if utils.get_winner(make_move(_board, move, player), player): return move return None
def minimax_ai(board, player): valid_moves = utils.get_valid_moves(board) best_move = None best_score = None for move in valid_moves: _board = copy.deepcopy(board) utils.make_move(_board, move, player) opponent = 'O' if player == 'X' else 'X' score = minimax_score(_board, opponent, player) if best_score is None or score > best_score: best_move = move best_score = score return best_move
def select_action(self, board, player_id, config=None): """ Chooses an action for the given board configuration. If agent is not greedy, and epsilon-greedy policy will be used -- requires `self.get_exploration_factor` to be implemented. """ if self.greedy: action = self.select_best_action(board, player_id, config) return action sample = random.random() epsilon = self.get_exploration_factor() if sample > epsilon: action = self.select_best_action(board, player_id, config) else: valid_moves = get_valid_moves(board, config) action = random.choice(valid_moves) return action
def random_ai(board, player): return random.choice(utils.get_valid_moves(board))