Пример #1
0
    def alphabeta(self, state, depth, alpha, beta, max_player, timer):
        state_proc = self.proc_state_table.get(state)
        if state_proc is None:
            state_proc = env.AlphaStateProcessor(state)
            state_proc.process()
            self.proc_state_table.put(state, state_proc)
            self.nodes += 1

        if timer.is_over():
            raise Exception('time is over!')

        if depth == 0 or state_proc.is_terminal():
            return -1, state_proc.utility(self.color)

        moving_order = self.moving_order.get(state)
        if moving_order is None:
            moving_order = self.get_fresh_dict()

        if max_player:
            max_act = -1
            for a, v in reversed(
                    sorted(moving_order.items(), key=lambda kv: kv[1])):
                if len(state[a]) >= 6:
                    continue
                next_state = env.get_next_state(state, a, self.color)
                _, val = self.alphabeta(next_state, depth - 1, alpha, beta,
                                        False, timer)
                moving_order[a] = val
                if alpha < val:
                    alpha = val
                    max_act = a
                if alpha >= beta:
                    break
            self.moving_order.put(state, moving_order)
            return max_act, alpha
        else:
            min_act = -1
            for a, v in sorted(moving_order.items(), key=lambda kv: kv[1]):
                if len(state[a]) >= 6:
                    continue
                next_state = env.get_next_state(
                    state, a, env.get_oponent_color(self.color))
                _, val = self.alphabeta(next_state, depth - 1, alpha, beta,
                                        True, timer)
                moving_order[a] = val
                if beta > val:
                    beta = val
                    min_act = a
                if alpha >= beta:
                    break
            self.moving_order.put(state, moving_order)
            return min_act, beta
Пример #2
0
 def minimax(self, state: TurnBasedGameState, D: int):
     if D == 0 or not state.game_state.snakes[
             self.player_index].alive or state.game_state.is_terminal_state:
         return heuristic(state.game_state, self.player_index)
     best_value = -np.inf
     worst_value = np.inf
     if state.turn == self.Turn.AGENT_TURN:
         for our_action in state.game_state.get_possible_actions(
                 player_index=self.player_index):
             h_value = self.minimax(
                 self.TurnBasedGameState(state.game_state, our_action), D)
             if h_value > best_value:
                 best_value = h_value
         return best_value
     else:
         for opponents_actions in state.game_state.get_possible_actions_dicts_given_action(
                 state.agent_action, player_index=self.player_index):
             opponents_actions[self.player_index] = state.agent_action
             next_state = get_next_state(state.game_state,
                                         opponents_actions)
             h_value = self.minimax(
                 self.TurnBasedGameState(next_state, None), D - 1)
             if h_value < worst_value:
                 worst_value = h_value
         return worst_value
Пример #3
0
    def rb_minimax(self, state: TurnBasedGameState, depth: int):
        if state.game_state.turn_number == state.game_state.game_duration_in_turns:
            if state.game_state.current_winner == self.player_index:
                return state.game_state.snakes[self.player_index].length ** 2
            else:
                return -1

        if len(state.game_state.living_agents) == 0:
            return -1

        if depth == 0:
            return heuristic(state.game_state, self.player_index)

        if state.turn == self.Turn.AGENT_TURN:
            current_max = np.NINF
            for action in state.game_state.get_possible_actions(self.player_index):
                value = self.rb_minimax(self.TurnBasedGameState(state.game_state, action), depth)
                current_max = max(current_max, value)
            return current_max
        else:
            current_min = np.Inf
            for opponents_actions in state.game_state.get_possible_actions_dicts_given_action(state.agent_action,
                                                                                              player_index=self.player_index):
                opponents_actions[self.player_index] = state.agent_action
                next_state = get_next_state(state.game_state, opponents_actions)
                value = self.rb_minimax(self.TurnBasedGameState(next_state, None), depth - 1)
                current_min = min(current_min, value)
            return current_min
Пример #4
0
    def get_action(self, state: GameState) -> GameAction:

        # Very similar to greedy agent get action, but now instead of picking an action with highest value we pick
        # action with highest avg value, so in avg our snake will do good
        best_actions = state.get_possible_actions(player_index=self.player_index)
        best_value = -np.inf
        for action in state.get_possible_actions(player_index=self.player_index):
            avg_value = 0
            actions_len = 0
            for opponents_actions in state.get_possible_actions_dicts_given_action(action,
                                                                                   player_index=self.player_index):
                opponents_actions[self.player_index] = action
                next_state = get_next_state(state, opponents_actions)
                h_value = _heuristic_for_tournament(next_state, self.player_index)
                avg_value += h_value
                actions_len += 1
                if len(state.opponents_alive) > 2:
                    # consider only 1 possible opponents actions to reduce time & memory:
                    break
            avg_value /= actions_len
            # choosing action according to the avg value we got preforming this action
            if avg_value > best_value:
                best_value = avg_value
                best_actions = [action]
            elif avg_value == best_value:
                best_actions.append(action)

        return np.random.choice(best_actions)
Пример #5
0
 def abminimax(self, state: TurnBasedGameState, D: int, alpha, beta):
     if D == 0 or not state.game_state.snakes[
             self.player_index].alive or state.game_state.is_terminal_state:
         return heuristic(state.game_state, self.player_index)
     best_value = -np.inf
     worst_value = np.inf
     if state.agent_action is None:
         for our_action in state.game_state.get_possible_actions(
                 player_index=self.player_index):
             h_value = self.abminimax(
                 self.TurnBasedGameState(state.game_state, our_action), D,
                 alpha, beta)
             best_value = max(h_value, best_value)
             alpha = max(alpha, best_value)
             if best_value >= beta:
                 return np.inf
         return best_value
     else:
         for opponents_actions in state.game_state.get_possible_actions_dicts_given_action(
                 state.agent_action, player_index=self.player_index):
             opponents_actions[self.player_index] = state.agent_action
             next_state = get_next_state(state.game_state,
                                         opponents_actions)
             h_value = self.abminimax(
                 self.TurnBasedGameState(next_state, None), D - 1, alpha,
                 beta)
             worst_value = min(worst_value, h_value)
             beta = min(worst_value, beta)
             if worst_value <= alpha:
                 return -np.inf
         return worst_value
Пример #6
0
 def minimax(self,
             state: MinimaxAgent.TurnBasedGameState,
             depth: int,
             alpha=-np.inf,
             beta=np.inf) -> float:
     if state.game_state.is_terminal_state or depth > 5 or state.game_state.get_possible_actions(
             player_index=self.player_index).__len__ is 0:
         return heuristic(state.game_state, self.player_index)
     if state.turn == self.Turn.OPPONENTS_TURN:
         curr_min = np.inf
         for opponents_actions in state.game_state.get_possible_actions_dicts_given_action(
                 state.agent_action, player_index=self.player_index):
             opponents_actions[self.player_index] = state.agent_action
             next_state = get_next_state(state.game_state,
                                         opponents_actions)
             our_turn = self.TurnBasedGameState(next_state, None)
             curr_min = min(self.minimax(our_turn, depth + 1, alpha, beta),
                            curr_min)
             beta = min(beta, curr_min)
             if curr_min <= alpha:
                 return -np.inf
         # print(curr_max)
         return curr_min
     else:
         curr_max = -np.inf
         for agent_action in state.game_state.get_possible_actions(
                 player_index=self.player_index):
             opp_turn = self.TurnBasedGameState(state.game_state,
                                                agent_action)
             curr_max = max(self.minimax(opp_turn, depth, alpha, beta),
                            curr_max)
             alpha = max(curr_max, alpha)
             if curr_max >= beta:
                 return np.inf
         return curr_max
Пример #7
0
 def RB_alphaBeta(self, state: MinimaxAgent.TurnBasedGameState, depth, alpha, beta):
     if state.game_state.is_terminal_state:
         return self.utility(state)
     if depth == 0:
         return heuristic(state.game_state, self.player_index)
     if state.turn == self.Turn.AGENT_TURN:
         cur_max = -np.inf
         for action in state.game_state.get_possible_actions(player_index=self.player_index):
             next_state = self.TurnBasedGameState(state.game_state, action)
             v = self.RB_alphaBeta(next_state, depth - 1, alpha, beta)
             cur_max = max(v, cur_max)
             alpha = max(cur_max, alpha)
             if cur_max >= beta:
                 return np.inf
         return cur_max
     else:
         cur_min = np.inf
         for opponents_actions in state.game_state.get_possible_actions_dicts_given_action(state.agent_action,
                                                                                           player_index=self.player_index):
             opponents_actions[self.player_index] = state.agent_action
             next_state = get_next_state(state.game_state, opponents_actions)
             tb_next_state = self.TurnBasedGameState(next_state, None)
             v = self.RB_alphaBeta(tb_next_state, depth - 1, alpha, beta)
             cur_min = min(v, cur_min)
             beta = min(cur_min, beta)
             if cur_min <= alpha:
                 return -np.inf
         return cur_min
Пример #8
0
 def get_action(self, state: GameState) -> GameAction:
     # init with all possible actions for the case where the agent is alone. it will (possibly) be overridden later
     start = time.time()
     best_actions = state.get_possible_actions(
         player_index=self.player_index)
     best_value = -np.inf
     for action in state.get_possible_actions(
             player_index=self.player_index):
         for opponents_actions in state.get_possible_actions_dicts_given_action(
                 action, player_index=self.player_index):
             opponents_actions[self.player_index] = action
             next_state = get_next_state(state, opponents_actions)
             h_value = self._heuristic(next_state)
             if h_value > best_value:
                 best_value = h_value
                 best_actions = [action]
             elif h_value == best_value:
                 best_actions.append(action)
             if len(state.opponents_alive) > 2:
                 # consider only 1 possible opponents actions to reduce time & memory:
                 break
     end = time.time()
     self.counter_steps += 1
     self.avg_time = ((end - start) + self.avg_time *
                      (self.counter_steps - 1)) / self.counter_steps
     return np.random.choice(best_actions)
 def alpha_beta_value(self, state: MinimaxAgent.TurnBasedGameState,
                      agent_to_play, depth, alpha, beta):
     if state.game_state.is_terminal_state or depth == 0:
         return heuristic(state.game_state, self.player_index)
     turn = state.turn
     if turn == agent_to_play:
         cur_max = float('-inf')
         for action in state.game_state.get_possible_actions(
                 self.player_index):
             state.agent_action = action
             v = self.alpha_beta_value(state, agent_to_play, depth, alpha,
                                       beta)
             cur_max = max(v, cur_max)
             alpha = max(cur_max, alpha)
             if cur_max >= beta:
                 return float('inf')
         return cur_max
     else:
         cur_min = float('inf')
         for opponents_actions in state.game_state.get_possible_actions_dicts_given_action(
                 state.agent_action, self.player_index):
             opponents_actions[self.player_index] = state.agent_action
             next_state = get_next_state(state.game_state,
                                         opponents_actions)
             turn_next_state = MinimaxAgent.TurnBasedGameState(
                 next_state, None)
             v = self.alpha_beta_value(turn_next_state, agent_to_play,
                                       depth - 1, alpha, beta)
             cur_min = min(v, cur_min)
             if cur_min <= alpha:
                 return float('-inf')
         return cur_min
Пример #10
0
 def __RB_Minimax__(self, state: TurnBasedGameState, depth):
     if state.game_state.is_terminal_state:
         return heuristic(state.game_state, self.player_index)
     if depth <= 0:
         assert (depth == 0)
         return heuristic(state.game_state, self.player_index)
     if state.turn == self.Turn.AGENT_TURN:
         cur_max = -np.inf
         for action in state.game_state.get_possible_actions(
                 self.player_index):
             state.agent_action = action
             cur_value = self.__RB_Minimax__(state, depth)
             cur_max = max(cur_max, cur_value)
         return cur_max
     else:
         assert state.turn == self.Turn.OPPONENTS_TURN
         cur_min = np.inf
         for opponents_actions in state.game_state.get_possible_actions_dicts_given_action(
                 state.agent_action, self.player_index):
             next_state = get_next_state(state.game_state,
                                         opponents_actions)
             next_state_with_turn = self.TurnBasedGameState(
                 next_state, None)
             cur_min = min(
                 cur_min,
                 self.__RB_Minimax__(next_state_with_turn, depth - 1))
         return cur_min
Пример #11
0
 def get_action_wrapper(self, state: MinimaxAgent.TurnBasedGameState,
                        dep: int, alpha: float, beta: float) -> float:
     if dep == 0 or state.game_state.is_terminal_state:
         return heuristic(state.game_state, self.player_index)
     turn = state.turn
     if turn == MinimaxAgent.Turn.AGENT_TURN:
         curr_max = -np.inf
         all_actions = state.game_state.get_possible_actions(
             self.player_index)
         for action in all_actions:
             state.agent_action = action
             temp_val = self.get_action_wrapper(state, dep, alpha, beta)
             curr_max = max(curr_max, temp_val)
             alpha = max(curr_max, alpha)
             if curr_max >= beta:
                 return np.inf
         return curr_max
     else:
         assert (MinimaxAgent.Turn.OPPONENTS_TURN == turn)
         curr_min = np.inf
         for opponenets_action in state.game_state.get_possible_actions_dicts_given_action(
                 state.agent_action, self.player_index):
             next_state = get_next_state(state.game_state,
                                         opponenets_action)
             next_state_with_turn = MinimaxAgent.TurnBasedGameState(
                 next_state, None)
             temp_val = self.get_action_wrapper(next_state_with_turn,
                                                dep - 1, alpha, beta)
             curr_min = min(curr_min, temp_val)
             beta = min(curr_min, beta)
             if curr_min <= alpha:
                 return -np.inf
         return curr_min
Пример #12
0
 def _RB_minimax(self, tb_state: TurnBasedGameState, deciding_agent: Turn,
                 d: int):
     if tb_state.game_state.is_terminal_state:
         return self._utility(tb_state.game_state)
     if d == 0:
         return heuristic(tb_state.game_state, self.player_index)
     if deciding_agent == self.Turn.AGENT_TURN:
         actions = tb_state.game_state.get_possible_actions(
             player_index=self.player_index)
         cur_max = -np.inf
         for action in actions:
             tb_state.agent_action = action
             value = self._RB_minimax(tb_state, self.Turn.OPPONENTS_TURN, d)
             cur_max = max(cur_max, value)
         return cur_max
     else:
         cur_min = np.inf
         for opponents_actions in tb_state.game_state.get_possible_actions_dicts_given_action(
                 tb_state.agent_action, player_index=self.player_index):
             next_state = get_next_state(tb_state.game_state,
                                         opponents_actions)
             new_tb_state = self.TurnBasedGameState(next_state, None)
             value = self._RB_minimax(new_tb_state, self.Turn.AGENT_TURN,
                                      d - 1)
             cur_min = min(cur_min, value)
         return cur_min
Пример #13
0
    def get_action(self, state: GameState) -> GameAction:
        if self.is_trap(state):
            return self.trap_escape(state)
        # init with all possible actions for the case where the agent is alone. it will (possibly) be overridden later
        best_actions = state.get_possible_actions(
            player_index=self.player_index)
        best_value = -np.inf
        for action in state.get_possible_actions(
                player_index=self.player_index):
            for opponents_actions in state.get_possible_actions_dicts_given_action(
                    action, player_index=self.player_index):
                opponents_actions[self.player_index] = action
                next_state = get_next_state(state, opponents_actions)
                h_value = self.tournament_heuristic(next_state)
                if h_value > best_value:
                    best_value = h_value
                    best_actions = [action]
                elif h_value == best_value:
                    best_actions.append(action)

        return np.random.choice(best_actions)
Пример #14
0
 def alphabeta(self, state: MinimaxAgent.TurnBasedGameState, player_index: int, depth: int, alpha: float,
               beta: float):
     # check if we are at max node
     if state.turn == MinimaxAgent.Turn.AGENT_TURN:
         if state.game_state.turn_number == state.game_state.game_duration_in_turns or not state.game_state.snakes[
             player_index].alive:
             return state.game_state.snakes[player_index].length, state.agent_action
         if depth == 0:
             return heuristic(state.game_state, player_index), state.agent_action
         best_action = None
         max_value = -np.inf
         for action in state.game_state.get_possible_actions(player_index=player_index):
             turn_state = self.TurnBasedGameState(state.game_state, action)
             # passing alpha and beta to the next node
             next_state_value, _ = self.alphabeta(turn_state, player_index, depth, alpha, beta)
             if next_state_value > max_value:
                 best_action = action
                 max_value = next_state_value
             # determine alpha according to the max value we currently have
             alpha = max(max_value, alpha)
             if max_value >= beta:
                 return np.inf, best_action
         return max_value, best_action
     else:
         best_action = None
         min_value = np.inf
         for opponents_actions in state.game_state.get_possible_actions_dicts_given_action(state.agent_action,
                                                                                           player_index=self.player_index):
             next_state = get_next_state(state.game_state, opponents_actions)
             turn_state = self.TurnBasedGameState(next_state, None)
             # passing alpha and beta to the next node
             next_state_value, action = self.alphabeta(turn_state, player_index, depth - 1, alpha, beta)
             if next_state_value < min_value:
                 best_action = action
                 min_value = next_state_value
             # determine beta according to the min value we currently have
             beta = min(min_value, beta)
             if min_value <= alpha:
                 return -np.inf, best_action
         return min_value, best_action
Пример #15
0
 def RB_minimax(self, state: TurnBasedGameState, depth):
     if state.game_state.is_terminal_state:
         return self.utility(state)
     if depth == 0:
         return heuristic(state.game_state, self.player_index)
     if state.turn == self.Turn.AGENT_TURN:
         cur_max = -np.inf
         for action in state.game_state.get_possible_actions(player_index=self.player_index):
             next_state = self.TurnBasedGameState(state.game_state, action)
             v = self.RB_minimax(next_state, depth - 1)
             cur_max = max(v, cur_max)
         return cur_max
     else:
         cur_min = np.inf
         for opponents_actions in state.game_state.get_possible_actions_dicts_given_action(state.agent_action,
                                                                                           player_index=self.player_index):
             opponents_actions[self.player_index] = state.agent_action
             next_state = get_next_state(state.game_state, opponents_actions)
             tb_next_state = self.TurnBasedGameState(next_state, None)
             v = self.RB_minimax(tb_next_state, depth)
             cur_min = min(v, cur_min)
         return cur_min
Пример #16
0
 def abminimax(self, state: TurnBasedGameState, D: int, alpha, beta):
     if D == 0 or not state.game_state.snakes[
             self.player_index].alive or state.game_state.is_terminal_state:
         return heuristic(state.game_state, self.player_index)
     best_value = -np.inf
     worst_value = np.inf
     if state.agent_action is None:
         for our_action in state.game_state.get_possible_actions(
                 player_index=self.player_index):
             h_value = self.abminimax(
                 self.TurnBasedGameState(state.game_state, our_action), D,
                 alpha, beta)
             if h_value > best_value:
                 best_value = h_value
                 alpha = max(alpha, best_value)
             if best_value >= beta:
                 # print("cut beta in depth: {}, beta is: {}, alpha is: {}".format(D, beta, alpha))
                 return np.inf
         # if not state.game_state.snakes[self.player_index].alive:
         # print("i entered agent with a dead snake, returning {}", format(best_value))
         return best_value
     else:
         for opponents_actions in state.game_state.get_possible_actions_dicts_given_action(
                 state.agent_action, player_index=self.player_index):
             opponents_actions[self.player_index] = state.agent_action
             next_state = get_next_state(state.game_state,
                                         opponents_actions)
             # print("entered None ")
             h_value = self.abminimax(
                 self.TurnBasedGameState(next_state, None), D - 1, alpha,
                 beta)
             if h_value < worst_value:
                 worst_value = h_value
                 beta = min(worst_value, beta)
             if worst_value <= alpha:
                 # print("cut alpha in depth: {}, beta is: {}, alpha is: {}".format(D, beta, alpha))
                 return -np.inf
         return worst_value
Пример #17
0
 def minimax(self, state: TurnBasedGameState, depth: int) -> float:
     if state.game_state.is_terminal_state or depth > 3 or state.game_state.get_possible_actions(
             player_index=self.player_index).__len__ is 0:
         return heuristic(state.game_state, self.player_index)
     if state.turn == self.Turn.OPPONENTS_TURN:
         curr_min = np.inf
         for opponents_actions in state.game_state.get_possible_actions_dicts_given_action(
                 state.agent_action, player_index=self.player_index):
             opponents_actions[self.player_index] = state.agent_action
             next_state = get_next_state(state.game_state,
                                         opponents_actions)
             our_turn = self.TurnBasedGameState(next_state, None)
             curr_min = min(self.minimax(our_turn, depth + 1), curr_min)
         #print(curr_max)
         return curr_min
     else:
         curr_max = -np.inf
         for agent_action in state.game_state.get_possible_actions(
                 player_index=self.player_index):
             opp_turn = self.TurnBasedGameState(state.game_state,
                                                agent_action)
             curr_max = max(self.minimax(opp_turn, depth), curr_max)
         return curr_max
Пример #18
0
 def rb_minimax(self, state: TurnBasedGameState, player_index: int, depth: int):
     # check if we are at maximum node
     if state.turn == MinimaxAgent.Turn.AGENT_TURN:
         # if we finished the game or our snake is dead we return the snake`s length
         if state.game_state.turn_number == state.game_state.game_duration_in_turns or not state.game_state.snakes[
             player_index].alive:
             return state.game_state.snakes[player_index].length, state.agent_action
         # using heuristic if we reached depth of 0
         if depth == 0:
             return heuristic(state.game_state, player_index), state.agent_action
         best_action = None
         max_value = -np.inf
         # going over our player possible actions and returning the action with max value
         for action in state.game_state.get_possible_actions(player_index=player_index):
             # creates turn state with the action we are checking
             turn_state = self.TurnBasedGameState(state.game_state, action)
             next_state_value, _ = self.rb_minimax(turn_state, player_index, depth)
             if next_state_value > max_value:
                 best_action = action
                 max_value = next_state_value
         return max_value, best_action
     # minimum node
     else:
         best_action = None
         min_value = np.inf
         # going over all opponents possible actions, and returning the actions with minimum value
         for opponents_actions in state.game_state.get_possible_actions_dicts_given_action(state.agent_action,
                                                                                           player_index=self.player_index):
             # building next state
             next_state = get_next_state(state.game_state, opponents_actions)
             turn_state = self.TurnBasedGameState(next_state, None)
             # getting the value for the state we build (calling minimax with depth smaller by 1)
             next_state_value, action = self.rb_minimax(turn_state, player_index, depth - 1)
             if next_state_value < min_value:
                 best_action = action
                 min_value = next_state_value
         return min_value, best_action
 def minimax_value(self, state: TurnBasedGameState, agent_to_play, depth):
     if state.game_state.is_terminal_state or depth == 0:
         return heuristic(state.game_state, self.player_index)
     turn = state.turn
     if turn == agent_to_play:
         cur_max = float('-inf')
         for action in state.game_state.get_possible_actions(
                 self.player_index):
             state.agent_action = action
             v = self.minimax_value(state, agent_to_play, depth)
             cur_max = max(v, cur_max)
         return cur_max
     else:
         cur_min = float('inf')
         for opponents_actions in state.game_state.get_possible_actions_dicts_given_action(
                 state.agent_action, self.player_index):
             opponents_actions[self.player_index] = state.agent_action
             next_state = get_next_state(state.game_state,
                                         opponents_actions)
             turn_next_state = self.TurnBasedGameState(next_state, None)
             v = self.minimax_value(turn_next_state, agent_to_play,
                                    depth - 1)
             cur_min = min(v, cur_min)
         return cur_min
Пример #20
0
player[env.BLACK] = algos[env.BLACK](env.BLACK)

state = env.get_initial_state()
proc = env.SimpleStateProcessor(state)
proc.process()
color = env.WHITE
while not proc.is_terminal():
    if args['print_moves']:
        os.system('clear')
        print('\n\n\n')
    env.print_state(state)
    print("\n{} ({}) IS MOVING...".format(
        'WHITE' if color == env.WHITE else 'BLACK', algos[color].__name__))
    a = player[color].get_action(state, timer=api.Timer(args['time']))
    player[env.WHITE].update_move(a)
    player[env.BLACK].update_move(a)
    state = env.get_next_state(state, a, color)
    proc = env.SimpleStateProcessor(state)
    proc.process()
    color = env.get_oponent_color(color)

if args['print_moves']:
    os.system('clear')
    print('\n\n\n')
env.print_state(state)
if proc.get_winner() == env.WHITE:
    print("WHITE ({}) WON".format(algos[env.WHITE].__name__))
elif proc.get_winner() == env.BLACK:
    print("BLACK ({}) WON".format(algos[env.BLACK].__name__))
else:
    print("DRAW")