示例#1
0
 def __init__(self, colour):
     """
     Initialises an MPMixPlayer agent.
     """
     self.colour = colour
     self.state = State()
     self.clock = 0
     self.counts = defaultdict(int)
示例#2
0
def max_n(state, counts, heuristic, max_player_evals=[-inf]*N_PLAYERS, depth_left=MAX_DEPTH):
    """
    :summary: Max^N. A 3 player variant of minimax with no good pruning techniques available.
    Pretty useless (due to limited pruning) and is used little in game.
    :returns: tuple of (evaluation, action_if_any)
    """

    if not depth_left:
        evals = heuristic(state)
        return (evals, None)

    best_action, best_new_action = None, None
    player = state.turn
    index = PLAYER_HASH[player]

    generated_actions = state.possible_actions(player)

    for action in generated_actions:
        new_state = State.apply_action(state, action)

        # Get vector of evaluations
        player_eval = max_n(new_state, counts, heuristic, max_player_evals, depth_left-1)[0]

        if player_eval[index] > max_player_evals[index]:
            max_player_evals, best_action = player_eval, action
            if new_state.hash not in counts:
                best_new_action = best_action

    return (player_eval, best_new_action) if best_new_action is not None else (player_eval, best_action)
示例#3
0
    def greedy_action(self):
        """
        :strategy: Choose the best action without considering opponent moves.
        """
        best_eval, best_action, best_new_action = -inf, None, None

        for action in self.state.possible_actions(self.colour):
            new_state = State.apply_action(self.state, action)
            new_eval = runner(new_state)[PLAYER_HASH[self.colour]]
            if new_eval > best_eval:
                best_eval = new_eval
                best_action = action
                if new_state.hash not in self.counts:
                    best_new_action = action
        return best_new_action if not None else best_action
示例#4
0
def directed_offensive(state, counts, heuristic, max_player, target, min_eval=inf, depth_left=MAX_DEPTH):
    """
    :summary: An algorithm aimed to MINIMISE a target player used in a 3 player
    scenario with no good pruning techniques possible.
    :assumption: all players will wish to maximise themselves (like a typical Max^n algorithm)
    :strategy: If we find an evaluation minimising a target's evaluation,
    and it is beneficial to us, that becomes our "best action".
    :returns: tuple of (evaluation, action_if_any)
    """

    if not depth_left:
        evals = heuristic(state)
        # Value should be offset by the situation the target is in
        evals[PLAYER_HASH[max_player]] -= desperation(state)[PLAYER_HASH[target]]

        return (evals, None)

    max_player_evals = [-inf]*N_PLAYERS
    best_action, best_new_action = None, None
    player = state.turn
    index = PLAYER_HASH[player]
    target_index = PLAYER_HASH[target]

    generated_actions = state.possible_actions(player, sort=(player==max_player))
    for action in generated_actions:
        new_state = State.apply_action(state, action)

        # Get vector of evaluations
        player_eval = directed_offensive(new_state, counts, heuristic, max_player, target, min_eval, depth_left-1)[0]

        if player != max_player:
            # NOT the max_player - we assume they will want to just maximise themselves
            if player_eval[index] > max_player_evals[index]:
                max_player_evals, best_action = player_eval, action
                if new_state.hash not in counts:
                    best_new_action = best_action
        else:
            # If this new eval LOWERS our target eval and our eval is NOT WORSE, then update our path with this action
            if player_eval[target_index] < min_eval and player_eval[index] >= max_player_evals[index]:
                max_player_evals, best_action, min_eval = player_eval, action, player_eval[target_index]
                if new_state.hash not in counts:
                    best_new_action = best_action

    return (player_eval, best_new_action) if best_new_action is not None else (player_eval, best_action)
示例#5
0
def paranoid(state, counts, heuristic, max_player, alpha=-inf, beta=inf, depth_left=MAX_DEPTH, loser=False):
    """
    :summary: Paranoid assuming a 1 vs rest scenario.
    Used when winning / losing given a certain threshold.
    The implementation also uses alpha-beta pruning, with the assumption of good ordering.
    :returns: tuple of (evaluation, action_if_any)
    """

    if not depth_left:
        evals = heuristic(state)
        if loser:
            evals[PLAYER_HASH[max_player]] += desperation(state)[PLAYER_HASH[max_player]]
        return (evals, None)

    best_action, best_new_action = None, None
    player = state.turn
    index = PLAYER_HASH[player]

    generated_actions = state.possible_actions(player)
    for action in generated_actions:
        new_state = State.apply_action(state, action)

        # Only want vector of evaluations
        player_eval = paranoid(new_state, counts, heuristic, max_player, alpha, beta, depth_left-1)[0]

        if (player == max_player):
            # MaximisingPlayer wants to maximise alpha
            if player_eval[index] > alpha:
                alpha, best_action = player_eval[index], action
                if new_state.hash not in counts:
                    best_new_action = best_action
        else:
            # MinimisingPlayer wants to worsen beta
            if player_eval[index] < beta:
                beta, best_action = player_eval[index], action
                if new_state.hash not in counts:
                    best_new_action = best_action

        if alpha >= beta:
            return (player_eval, best_new_action) if best_new_action is not None else (player_eval, best_action)
    return (player_eval, best_new_action) if best_new_action is not None else (player_eval, best_action)
示例#6
0
def alpha_beta(state, counts, heuristic, max_player, alpha=-inf, beta=inf, depth_left=MAX_DEPTH):
    """
    :summary: Simple yet effective implementation of minimax with alpha-beta pruning.
    :returns: vector of (evaluation, action_if_any)
    """
    if not depth_left:
        evals = heuristic(state)
        return (evals, None)

    best_action, best_new_action = None, None
    player = state.turn
    index = PLAYER_HASH[player]

    generated_actions = state.possible_actions(player)
    for action in generated_actions:
        new_state = State.apply_action(state, action, ignore_dead=True)

        # Only want vector of evaluations
        player_eval = alpha_beta(new_state, counts, heuristic, max_player, alpha, beta, depth_left-1)[0]

        if (player == max_player):
            # MaximisingPlayer wants to maximise alpha
            if player_eval[index] > alpha:
                alpha, best_action = player_eval[index], action
                if new_state.hash not in counts:
                    best_new_action = best_action
        else:
            # MinimisingPlayer wants to worsen beta
            if player_eval[index] < beta:
                beta, best_action = player_eval[index], action
                if new_state.hash not in counts:
                    best_new_action = best_action

        if alpha >= beta:
            return (player_eval, best_new_action) if best_new_action is not None else (player_eval, best_action)
    return (player_eval, best_new_action) if best_new_action is not None else (player_eval, best_action)
示例#7
0
 def update(self, colour, action):
     """
     Updates a players action and adds a turn count.
     """
     self.state = State.apply_action(self.state, action)
     self.counts[self.state.hash] += 1
示例#8
0
class MPMixPlayer:
    MID_GAME_THRESHOLD = 0  # The first two moves for each player (four possible good moves)
    END_GAME_THRESHOLD = 99

    def __init__(self, colour):
        """
        Initialises an MPMixPlayer agent.
        """
        self.colour = colour
        self.state = State()
        self.clock = 0
        self.counts = defaultdict(int)

    def update(self, colour, action):
        """
        Updates a players action and adds a turn count.
        """
        self.state = State.apply_action(self.state, action)
        self.counts[self.state.hash] += 1

    def action(self):
        """
        Returns an action given time constraints (55 seconds CPU)
        """
        if not self.state.pieces(self.colour):
            return ("PASS", None)

        if self.clock <= 45:
            start = process_time()

            if self.state.num_opponents_dead() == 1:
                action = self.run_2_player()
            elif self.state.num_opponents_dead() == 2:
                action = self.djikstra()
            elif self.start_mid_game():
                action = self.mid_game()
            else:
                action = self.early_game()
            self.clock += process_time() - start
            print(self.clock)
        else:
            action = self.greedy_action()

        return action

    def action_logic(self):
        """
        Returns an action given conditions.
        """

    def early_game(self):
        """
        :strategy: Uses the best opening moves found by the Monte Carlo method. (Booking)
        If opening move not available, run paranoid and make sure that we maintain good piece structure
        """
        return opening_moves(
            self.state, self.colour) if not False else paranoid(
                self.state, self.counts, end_game_heuristic, self.colour)[1]

    def mid_game(self):
        """
        :strategy: Runs the MP-Mix Algorithm.
        :returns: The best evaluated function. If True is returned, we are at a good level to attempt a greedy approach.
        """
        action = mp_mix(self.state,
                        self.counts,
                        end_game_heuristic,
                        defence_threshold=0,
                        offence_threshold=0)

        if action == True:  # if True then run Greedy
            return self.end_game()
        return action

    def run_2_player(self):
        """
        :strategy: Run the paranoid algorithm with a higher depth.
                   This works because paranoid defaults to alpha-beta by ignoring
                   dead players.
        """
        action = mp_mix(self.state,
                        self.counts,
                        two_player_heuristics,
                        defence_threshold=0,
                        offence_threshold=0,
                        two_player=True)
        if action is False:  # If False just use Dijkstra (we are sufficiently ahead)
            action = self.djikstra(single_player=False)
        elif action is True:  # if True then run Greedy
            return self.end_game()
        return action

    def end_game(self):
        """
        :strategy: Use booking or a stronger quiesence search
        """
        return self.greedy_action()

    def djikstra(self, single_player=True):
        """
        :strategy: If everyone is dead, it becomes Part A. Literally Part A code...
        """
        global PATH
        FLAGS = ["MOVE", "JUMP", "EXIT"]

        # AKIRA - RETURN DIJKSTRA'S GIVEN A PLAYER IS STILL ALIVE
        if not single_player:
            state = dict()
            state['colour'] = self.colour

            # TODO: Calculate jump distance for each piece and then return closest pieces for exit
            n_exited = self.state.exits[self.colour]
            n = MAX_EXITS - n_exited

            alive_opponent = self.state.get_remaining_opponent()

            temp = sorted([
                get_cubic_ordered(tup)
                for tup in self.state.pieces(self.colour)
            ],
                          key=lambda x: x[PLAYER_HASH[self.colour]],
                          reverse=True)
            state['pieces'] = [get_axial_ordered(tup) for tup in temp[:n]]
            state['blocks'] = [get_axial_ordered(tup) for tup in temp[n:]
                               ] + self.state.pieces(alive_opponent)
            action = list(map(lambda x: x.action_made, part_A_search(state)))[
                1]  # attempting the runner so take first move
            # (pos, flag, new_pos=None)

            return (FLAGS[action[1]],
                    action[0]) if FLAGS[action[1]] == "EXIT" else (
                        FLAGS[action[1]], (action[0], action[2]))

        if not bool(PATH):
            # Create part_A appropriate data
            state = dict()
            state['colour'] = self.colour

            # TODO: Calculate jump distance for each piece and then return closest pieces for exit
            n_exited = self.state.exits[self.colour]
            n = MAX_EXITS - n_exited

            temp = sorted([
                get_cubic_ordered(tup)
                for tup in self.state.pieces(self.colour)
            ],
                          key=lambda x: x[PLAYER_HASH[self.colour]],
                          reverse=True)
            state['pieces'] = [get_axial_ordered(tup) for tup in temp[:n]]
            state['blocks'] = [get_axial_ordered(tup) for tup in temp[n:]]

            PATH = list(map(lambda x: x.action_made, part_A_search(state)))[1:]
            print(PATH)

            # (pos, flag, new_pos=None)
            PATH = [(FLAGS[x[1]], x[0]) if FLAGS[x[1]] == "EXIT" else
                    (FLAGS[x[1]], (x[0], x[2])) for x in PATH]

            # (FLAG_str: (pos1, pos2=None))
        print(f"\n\t\t\t\t\t\t\t\t\t\t\t\t* ||| GG! 1 PLAYER GAME DIJKSTRA")
        return PATH.pop(0)

    def start_mid_game(self):
        """
        Starts mid game after 3 moves per player.
        """
        if self.state.depth == self.MID_GAME_THRESHOLD:
            print(f"* ({self.colour}) is switching to midgame")
        return (self.state.depth >= self.MID_GAME_THRESHOLD)

    def start_end_game(self):
        """
        Determines when to shift strategy to the end game given deciding factors.
        TODO: Add a flag once a player has been eliminated
        """
        if self.state.depth == self.END_GAME_THRESHOLD:
            print(f"* ({self.colour}) is switching to endgame")
        return (self.state.depth >= self.END_GAME_THRESHOLD)

    def greedy_action(self):
        """
        :strategy: Choose the best action without considering opponent moves.
        """
        best_eval, best_action, best_new_action = -inf, None, None

        for action in self.state.possible_actions(self.colour):
            new_state = State.apply_action(self.state, action)
            new_eval = runner(new_state)[PLAYER_HASH[self.colour]]
            if new_eval > best_eval:
                best_eval = new_eval
                best_action = action
                if new_state.hash not in self.counts:
                    best_new_action = action
        return best_new_action if not None else best_action