Пример #1
0
    def V(self, state, alpha=-game.INT_INF, beta=game.INT_INF):
        print '-',
        # BEGIN_YOUR_CODE
        if game.is_end(state):
            return game.utility(state)

        actions = game.get_possible_actions(state)

        player = game.get_player_from_state(state)
        if player == game.MAX_PLAYER:
            value = -game.INT_INF
            for action in actions:
                value = max(
                    value,
                    self.V(game.get_next_state(state, action), alpha, beta))
                alpha = max(alpha, value)
                if beta <= alpha: break
        else:
            value = game.INT_INF
            for action in actions:
                value = min(
                    value,
                    self.V(game.get_next_state(state, action), alpha, beta))
                beta = min(beta, value)
                if beta <= alpha: break

        return value
Пример #2
0
    def V(self, state, alpha=-game.INT_INF, beta=game.INT_INF):

        # If IsEnd(s)
        if game.is_end(state):
            return game.utility(state)

        # Get possible actions
        actions = game.get_possible_actions(state)
        assert len(actions) > 0

        # If player == agent (maximizing player)
        if game.get_player_from_state(state) == game.MAX_PLAYER:
            value = -game.INT_INF
            for action in actions:
                value = max(value, self.V(game.get_next_state(state, action), alpha, beta))
                alpha = max(alpha, value)
                if beta <= alpha: break

        # If player == opponent (minimzing player)
        else:
            value = game.INT_INF
            for action in actions:
                value = min(value, self.V(game.get_next_state(state, action), alpha, beta))
                beta = min(beta, value)
                if beta <= alpha: break

        return value
Пример #3
0
def create_policytable() -> Dict[GameState, Tuple[int, MoveList]]:
    """Create policytable by iterating through game state space.

    Returns:
        Dict[GameState, Tuple[int, MoveList]]: Map of state to utility and list of moves
    """
    space = game.create_statespace()
    pol_tab = {}

    for state in space:
        move_list = []

        # Convert 1x10 vector into game state tuple
        state = (state[0], np.asarray(state[1:]).reshape(3, 3))

        next_move = minimax_search(state)
        s = state
        while not game.is_terminal(s):
            move_list.append(next_move)

            next_move = minimax_search(s)
            s = game.result(s, next_move)

        u = game.utility(s)

        pol_tab[tuple(game.to_vector(state))] = (u, move_list)

    return pol_tab
Пример #4
0
    def V(self, state):
        # If IsEnd(s)
        if game.is_end(state):
            return game.utility(state)

        # Get possible actions
        actions = game.get_possible_actions(state)
        assert len(actions) > 0

        # If player == agent (maximizing player)
        if game.get_player_from_state(state) == game.MAX_PLAYER:
            value = -game.INT_INF
            for action in actions:
                value = max(value, self.V(game.get_next_state(
                    state, action)))  # use 'game.get_next_state'
# use 'game.get_next_state'
# value = max(self.V(game,get_next_state(state,action)) for action in actions)
# use 'game.get_next_state'

# If player == opponent (minimzing player)
        else:
            value = game.INT_INF
            for action in actions:
                value = min(value, self.V(game.get_next_state(
                    state, action)))  # use 'game.get_next_state'

        return value
Пример #5
0
    def V(self, state, alpha=-game.INT_INF, beta=game.INT_INF, verbose=0):
        if verbose >= 1:
            print('call V({})'.format(state))

        # If IsEnd(s)
        if game.is_end(state):
            return game.utility(state)

        # Get possible actions
        actions = game.get_possible_actions(state)
        assert len(actions) > 0

        # If player == agent (maximizing player)
        if game.get_player_from_state(state) == game.MAX_PLAYER:
            value = -game.INT_INF
            for action in actions:
                value = _FILL_IN_  # HINT: use self.V (with verbose=verbose) and game.get_next_state
                alpha = _FILL_IN_
                if _FILL_IN_: break

        # If player == opponent (minimzing player)
        else:
            value = game.INT_INF
            for action in actions:
                value = _FILL_IN_  # HINT: use self.V (with verbose=verbose) and game.get_next_state
                beta = _FILL_IN_
                if _FILL_IN_: break

        return value
Пример #6
0
    def V(self, state, depth):
        # If IsEnd(s)
        if game.is_end(state):
            return game.utility(state)

        # If depth = 0
        if depth == 0:
            #print game.get_board_str(state), eval(state)
            return eval(state)

        # Get possible actions
        actions = game.get_possible_actions(state)
        assert len(actions) > 0

        # If player == agent (maximizing player)
        if game.get_player_from_state(state) == game.MAX_PLAYER:
            value = -game.INT_INF
            for action in actions:
                value = max(value,
                            self.V(game.get_next_state(state, action), depth))

        # If player == opponent (minimzing player)
        else:
            value = game.INT_INF
            for action in actions:
                value = min(
                    value, self.V(game.get_next_state(state, action),
                                  depth - 1))

        return value
Пример #7
0
    def V(self, state, depth, verbose=0):
        if verbose >= 1:
            print('call V({})'.format(state))

        # If IsEnd(s)
        if game.is_end(state):
            return game.utility(state)

        # If depth = 0
        if depth == 0:
            #print game.get_board_str(state), eval(state)
            return eval(state)

        # Get possible actions
        actions = game.get_possible_actions(state)
        assert len(actions) > 0

        # If player == agent (maximizing player)
        if game.get_player_from_state(state) == game.MAX_PLAYER:
            value = -game.INT_INF
            for action in actions:
                value = _FILL_IN_  # HINT: use self.V (with verbose=verbose) and game.get_next_state

        # If player == opponent (minimzing player)
        else:
            value = game.INT_INF
            for action in actions:
                value = _FILL_IN_  # HINT: use self.V (with verbose=verbose) and game.get_next_state

        return value
Пример #8
0
    def V(self, state):
        # BEGIN_YOUR_CODE
        if game.is_end(state):
            return game.utility(state)
        player = game.get_player_from_state(state)
        if player == game.MAX_PLAYER:
            value = -game.INT_INF
            for action in game.get_possible_actions(state):
                value = max(value, self.V(game.get_next_state(state, action)))
        else:
            value = game.INT_INF
            for action in game.get_possible_actions(state):
                value = min(value, self.V(game.get_next_state(state, action)))

        return value
Пример #9
0
    def V(self, state, alpha=-game.INT_INF, beta=game.INT_INF):
        # BEGIN_YOUR_CODE
        if game.is_end(state):
            return game.utility(state)

        actions = game.get_possible_actions(state)
        if game.get_player_from_state(state) == game.MAX_PLAYER:  # my-turn
            value = -game.INT_INF
            for action in actions:
                value = max(value, self.V(game.get_next_state(state, action)))
        else:  # opp-turn
            value = 0
            for action in actions:
                value += self.V(game.get_next_state(state,
                                                    action)) / len(actions)

        return value
Пример #10
0
    def V(self, state):
        # If IsEnd(s)
        if game.is_end(state):
            return game.utility(state)

        # Get possible actions
        actions = game.get_possible_actions(state)
        assert len(actions) > 0

        # If player == agent (maximizing player)
        if game.get_player_from_state(state) == game.MAX_PLAYER:
            value = -game.INT_INF
            for _X_ in _X_:
                value = _X_  # use 'game.get_next_state'

        # If player == opponent (minimzing player)
        else:
            value = _X_

        return value
Пример #11
0
    def V(self, state, depth):
        # BEGIN_YOUR_CODE
        if game.is_end(state):
            return game.utility(state)
        if depth == 0:
            return eval(state)

        if game.get_player_from_state(state) == game.MAX_PLAYER:  # my-turn
            value = -game.INT_INF
            for action in game.get_possible_actions(state):
                value = max(value,
                            self.V(game.get_next_state(state, action), depth))
        else:  # opp-turn
            value = game.INT_INF
            for action in game.get_possible_actions(state):
                value = min(
                    value, self.V(game.get_next_state(state, action),
                                  depth - 1))

        return value
Пример #12
0
def max_value(state: np.ndarray) -> Tuple[int, GameMove]:
    """Look for the move generating the maximum value.

    Args:
        state (np.ndarray): Current state

    Returns:
        Tuple[int, Tuple]: Tuple of value and move
    """
    move = None
    if game.is_terminal(state):
        return game.utility(state), move

    v = -20
    for act in game.actions(state):
        v2, act2 = min_value(game.result(state, act))

        if v2 > v:
            v = v2
            move = act

    return v, move
Пример #13
0
def alphabeta_cutoff_search(state,
                            game,
                            d=4,
                            cutoff_test=None,
                            eval_fn=None,
                            extra_fn=None):
    """Search game to determine best action; use alpha-beta pruning.
    This version cuts off search and uses an evaluation function."""

    player = game.to_move(state)

    # Functions used by alphabeta
    def max_value(state, alpha, beta, depth):
        #print(depth)
        if cutoff_test(state, depth):
            return eval_fn(state, player)
        v = -infinity
        for a in game.actions(state):  ##
            v = max(
                v,
                min_value(game.result(state, a, player, extra_fn), alpha, beta,
                          depth + 1))
            if v >= beta:
                return v
            alpha = max(alpha, v)
        return v

    def min_value(state, alpha, beta, depth):
        #print(depth)
        if cutoff_test(state, depth):
            return eval_fn(state, player)
        v = infinity
        for a in game.actions(state):  ##
            v = min(
                v,
                max_value(game.result(state, a, player, extra_fn), alpha, beta,
                          depth + 1))
            if v <= alpha:
                return v
            beta = min(beta, v)
        return v

    # Body of alphabeta_cutoff_search starts here:
    # The default test cuts off at depth d or at a terminal state
    cutoff_test = (
        cutoff_test
        or (lambda state, depth: depth >= d or game.terminal_test(state)))
    eval_fn = eval_fn or (lambda state: game.utility(state, player))
    extra_fn = extra_fn or (lambda st1: st1.extra)
    #print("Well, I am inside alphabeta and i am going to apply...",extra_fn)
    best_score = -infinity
    beta = infinity
    best_action = None
    movimentos = game.actions(state)  ## jb
    if len(movimentos) == 1:
        return movimentos[0]
    else:
        random.shuffle(movimentos)  ## para dar variabilidade aos jogos
        for a in movimentos:  ##
            v = min_value(game.result(state, a, player, extra_fn), best_score,
                          beta, 1)
            if v > best_score:
                best_score = v
                best_action = a
        return best_action