Exemplo n.º 1
0
 def evaluate_all_valid_moves(self, board, player_id, config=None):
     """Estimate of the reward for each possible move"""
     scores = {
         move: self.evaluate_action(board, move, player_id, config)
         for move in get_valid_moves(board, config)
     }
     return scores
Exemplo n.º 2
0
def minimax_score(board, player_to_move, player_to_optimize, cache={}):
    board_cache_key = str(board)
    if board_cache_key in cache:
        return cache[board_cache_key]

    winner = utils.get_winning_player(board)
    if winner is not None:
        if winner == player_to_optimize:
            return 10
        elif winner == player_to_move:
            return -10
    elif utils.check_for_tie(board):
        return 0

    valid_moves = utils.get_valid_moves(board)
    scores = []
    for move in valid_moves:
        _board = copy.deepcopy(board)
        _board = utils.make_move(_board, move, player_to_move)
        opponent = 'O' if player_to_move == 'X' else 'X'
        score = minimax_score(_board, opponent, player_to_optimize, cache)
        scores.append(score)

    if player_to_move == player_to_optimize:
        cache[board_cache_key] = max(scores) if len(scores) > 0 else -10
    else:
        cache[board_cache_key] = min(scores) if len(scores) > 0 else 10

    return cache[board_cache_key]
Exemplo n.º 3
0
def find_winning_move_helper(board, player):
    valid_moves = utils.get_valid_moves(board)
    _board = copy.deepcopy(board)
    for move in valid_moves:
        if utils.get_winner(make_move(_board, move, player), player):
            return move
    return None
Exemplo n.º 4
0
def minimax_ai(board, player):
    valid_moves = utils.get_valid_moves(board)
    best_move = None
    best_score = None
    for move in valid_moves:
        _board = copy.deepcopy(board)
        utils.make_move(_board, move, player)
        opponent = 'O' if player == 'X' else 'X'
        score = minimax_score(_board, opponent, player)
        if best_score is None or score > best_score:
            best_move = move
            best_score = score
    return best_move
Exemplo n.º 5
0
    def select_action(self, board, player_id, config=None):
        """
        Chooses an action for the given board configuration.
        If agent is not greedy, and epsilon-greedy policy will be used
        -- requires `self.get_exploration_factor` to be implemented.
        """
        if self.greedy:
            action = self.select_best_action(board, player_id, config)
            return action

        sample = random.random()
        epsilon = self.get_exploration_factor()
        if sample > epsilon:
            action = self.select_best_action(board, player_id, config)
        else:
            valid_moves = get_valid_moves(board, config)
            action = random.choice(valid_moves)

        return action
Exemplo n.º 6
0
def random_ai(board, player):
    return random.choice(utils.get_valid_moves(board))