 def __init__(self, colour):
     Initialises an MPMixPlayer agent.
     self.colour = colour
     self.state = State()
     self.clock = 0
     self.counts = defaultdict(int)
def max_n(state, counts, heuristic, max_player_evals=[-inf]*N_PLAYERS, depth_left=MAX_DEPTH):
    :summary: Max^N. A 3 player variant of minimax with no good pruning techniques available.
    Pretty useless (due to limited pruning) and is used little in game.
    :returns: tuple of (evaluation, action_if_any)

    if not depth_left:
        evals = heuristic(state)
        return (evals, None)

    best_action, best_new_action = None, None
    player = state.turn
    index = PLAYER_HASH[player]

    generated_actions = state.possible_actions(player)

    for action in generated_actions:
        new_state = State.apply_action(state, action)

        # Get vector of evaluations
        player_eval = max_n(new_state, counts, heuristic, max_player_evals, depth_left-1)[0]

        if player_eval[index] > max_player_evals[index]:
            max_player_evals, best_action = player_eval, action
            if new_state.hash not in counts:
                best_new_action = best_action

    return (player_eval, best_new_action) if best_new_action is not None else (player_eval, best_action)
    def greedy_action(self):
        :strategy: Choose the best action without considering opponent moves.
        best_eval, best_action, best_new_action = -inf, None, None

        for action in self.state.possible_actions(self.colour):
            new_state = State.apply_action(self.state, action)
            new_eval = runner(new_state)[PLAYER_HASH[self.colour]]
            if new_eval > best_eval:
                best_eval = new_eval
                best_action = action
                if new_state.hash not in self.counts:
                    best_new_action = action
        return best_new_action if not None else best_action
def directed_offensive(state, counts, heuristic, max_player, target, min_eval=inf, depth_left=MAX_DEPTH):
    :summary: An algorithm aimed to MINIMISE a target player used in a 3 player
    scenario with no good pruning techniques possible.
    :assumption: all players will wish to maximise themselves (like a typical Max^n algorithm)
    :strategy: If we find an evaluation minimising a target's evaluation,
    and it is beneficial to us, that becomes our "best action".
    :returns: tuple of (evaluation, action_if_any)

    if not depth_left:
        evals = heuristic(state)
        # Value should be offset by the situation the target is in
        evals[PLAYER_HASH[max_player]] -= desperation(state)[PLAYER_HASH[target]]

        return (evals, None)

    max_player_evals = [-inf]*N_PLAYERS
    best_action, best_new_action = None, None
    player = state.turn
    index = PLAYER_HASH[player]
    target_index = PLAYER_HASH[target]

    generated_actions = state.possible_actions(player, sort=(player==max_player))
    for action in generated_actions:
        new_state = State.apply_action(state, action)

        # Get vector of evaluations
        player_eval = directed_offensive(new_state, counts, heuristic, max_player, target, min_eval, depth_left-1)[0]

        if player != max_player:
            # NOT the max_player - we assume they will want to just maximise themselves
            if player_eval[index] > max_player_evals[index]:
                max_player_evals, best_action = player_eval, action
                if new_state.hash not in counts:
                    best_new_action = best_action
            # If this new eval LOWERS our target eval and our eval is NOT WORSE, then update our path with this action
            if player_eval[target_index] < min_eval and player_eval[index] >= max_player_evals[index]:
                max_player_evals, best_action, min_eval = player_eval, action, player_eval[target_index]
                if new_state.hash not in counts:
                    best_new_action = best_action

    return (player_eval, best_new_action) if best_new_action is not None else (player_eval, best_action)
def paranoid(state, counts, heuristic, max_player, alpha=-inf, beta=inf, depth_left=MAX_DEPTH, loser=False):
    :summary: Paranoid assuming a 1 vs rest scenario.
    Used when winning / losing given a certain threshold.
    The implementation also uses alpha-beta pruning, with the assumption of good ordering.
    :returns: tuple of (evaluation, action_if_any)

    if not depth_left:
        evals = heuristic(state)
        if loser:
            evals[PLAYER_HASH[max_player]] += desperation(state)[PLAYER_HASH[max_player]]
        return (evals, None)

    best_action, best_new_action = None, None
    player = state.turn
    index = PLAYER_HASH[player]

    generated_actions = state.possible_actions(player)
    for action in generated_actions:
        new_state = State.apply_action(state, action)

        # Only want vector of evaluations
        player_eval = paranoid(new_state, counts, heuristic, max_player, alpha, beta, depth_left-1)[0]

        if (player == max_player):
            # MaximisingPlayer wants to maximise alpha
            if player_eval[index] > alpha:
                alpha, best_action = player_eval[index], action
                if new_state.hash not in counts:
                    best_new_action = best_action
            # MinimisingPlayer wants to worsen beta
            if player_eval[index] < beta:
                beta, best_action = player_eval[index], action
                if new_state.hash not in counts:
                    best_new_action = best_action

        if alpha >= beta:
            return (player_eval, best_new_action) if best_new_action is not None else (player_eval, best_action)
    return (player_eval, best_new_action) if best_new_action is not None else (player_eval, best_action)
def alpha_beta(state, counts, heuristic, max_player, alpha=-inf, beta=inf, depth_left=MAX_DEPTH):
    :summary: Simple yet effective implementation of minimax with alpha-beta pruning.
    :returns: vector of (evaluation, action_if_any)
    if not depth_left:
        evals = heuristic(state)
        return (evals, None)

    best_action, best_new_action = None, None
    player = state.turn
    index = PLAYER_HASH[player]

    generated_actions = state.possible_actions(player)
    for action in generated_actions:
        new_state = State.apply_action(state, action, ignore_dead=True)

        # Only want vector of evaluations
        player_eval = alpha_beta(new_state, counts, heuristic, max_player, alpha, beta, depth_left-1)[0]

        if (player == max_player):
            # MaximisingPlayer wants to maximise alpha
            if player_eval[index] > alpha:
                alpha, best_action = player_eval[index], action
                if new_state.hash not in counts:
                    best_new_action = best_action
            # MinimisingPlayer wants to worsen beta
            if player_eval[index] < beta:
                beta, best_action = player_eval[index], action
                if new_state.hash not in counts:
                    best_new_action = best_action

        if alpha >= beta:
            return (player_eval, best_new_action) if best_new_action is not None else (player_eval, best_action)
    return (player_eval, best_new_action) if best_new_action is not None else (player_eval, best_action)
 def update(self, colour, action):
     Updates a players action and adds a turn count.
     self.state = State.apply_action(self.state, action)
     self.counts[self.state.hash] += 1
class MPMixPlayer:
    MID_GAME_THRESHOLD = 0  # The first two moves for each player (four possible good moves)

    def __init__(self, colour):
        Initialises an MPMixPlayer agent.
        self.colour = colour
        self.state = State()
        self.clock = 0
        self.counts = defaultdict(int)

    def update(self, colour, action):
        Updates a players action and adds a turn count.
        self.state = State.apply_action(self.state, action)
        self.counts[self.state.hash] += 1

    def action(self):
        Returns an action given time constraints (55 seconds CPU)
        if not self.state.pieces(self.colour):
            return ("PASS", None)

        if self.clock <= 45:
            start = process_time()

            if self.state.num_opponents_dead() == 1:
                action = self.run_2_player()
            elif self.state.num_opponents_dead() == 2:
                action = self.djikstra()
            elif self.start_mid_game():
                action = self.mid_game()
                action = self.early_game()
            self.clock += process_time() - start
            action = self.greedy_action()

        return action

    def action_logic(self):
        Returns an action given conditions.

    def early_game(self):
        :strategy: Uses the best opening moves found by the Monte Carlo method. (Booking)
        If opening move not available, run paranoid and make sure that we maintain good piece structure
        return opening_moves(
            self.state, self.colour) if not False else paranoid(
                self.state, self.counts, end_game_heuristic, self.colour)[1]

    def mid_game(self):
        :strategy: Runs the MP-Mix Algorithm.
        :returns: The best evaluated function. If True is returned, we are at a good level to attempt a greedy approach.
        action = mp_mix(self.state,

        if action == True:  # if True then run Greedy
            return self.end_game()
        return action

    def run_2_player(self):
        :strategy: Run the paranoid algorithm with a higher depth.
                   This works because paranoid defaults to alpha-beta by ignoring
                   dead players.
        action = mp_mix(self.state,
        if action is False:  # If False just use Dijkstra (we are sufficiently ahead)
            action = self.djikstra(single_player=False)
        elif action is True:  # if True then run Greedy
            return self.end_game()
        return action

    def end_game(self):
        :strategy: Use booking or a stronger quiesence search
        return self.greedy_action()

    def djikstra(self, single_player=True):
        :strategy: If everyone is dead, it becomes Part A. Literally Part A code...
        global PATH
        FLAGS = ["MOVE", "JUMP", "EXIT"]

        if not single_player:
            state = dict()
            state['colour'] = self.colour

            # TODO: Calculate jump distance for each piece and then return closest pieces for exit
            n_exited = self.state.exits[self.colour]
            n = MAX_EXITS - n_exited

            alive_opponent = self.state.get_remaining_opponent()

            temp = sorted([
                for tup in self.state.pieces(self.colour)
                          key=lambda x: x[PLAYER_HASH[self.colour]],
            state['pieces'] = [get_axial_ordered(tup) for tup in temp[:n]]
            state['blocks'] = [get_axial_ordered(tup) for tup in temp[n:]
                               ] + self.state.pieces(alive_opponent)
            action = list(map(lambda x: x.action_made, part_A_search(state)))[
                1]  # attempting the runner so take first move
            # (pos, flag, new_pos=None)

            return (FLAGS[action[1]],
                    action[0]) if FLAGS[action[1]] == "EXIT" else (
                        FLAGS[action[1]], (action[0], action[2]))

        if not bool(PATH):
            # Create part_A appropriate data
            state = dict()
            state['colour'] = self.colour

            # TODO: Calculate jump distance for each piece and then return closest pieces for exit
            n_exited = self.state.exits[self.colour]
            n = MAX_EXITS - n_exited

            temp = sorted([
                for tup in self.state.pieces(self.colour)
                          key=lambda x: x[PLAYER_HASH[self.colour]],
            state['pieces'] = [get_axial_ordered(tup) for tup in temp[:n]]
            state['blocks'] = [get_axial_ordered(tup) for tup in temp[n:]]

            PATH = list(map(lambda x: x.action_made, part_A_search(state)))[1:]

            # (pos, flag, new_pos=None)
            PATH = [(FLAGS[x[1]], x[0]) if FLAGS[x[1]] == "EXIT" else
                    (FLAGS[x[1]], (x[0], x[2])) for x in PATH]

            # (FLAG_str: (pos1, pos2=None))
        print(f"\n\t\t\t\t\t\t\t\t\t\t\t\t* ||| GG! 1 PLAYER GAME DIJKSTRA")
        return PATH.pop(0)

    def start_mid_game(self):
        Starts mid game after 3 moves per player.
        if self.state.depth == self.MID_GAME_THRESHOLD:
            print(f"* ({self.colour}) is switching to midgame")
        return (self.state.depth >= self.MID_GAME_THRESHOLD)

    def start_end_game(self):
        Determines when to shift strategy to the end game given deciding factors.
        TODO: Add a flag once a player has been eliminated
        if self.state.depth == self.END_GAME_THRESHOLD:
            print(f"* ({self.colour}) is switching to endgame")
        return (self.state.depth >= self.END_GAME_THRESHOLD)

    def greedy_action(self):
        :strategy: Choose the best action without considering opponent moves.
        best_eval, best_action, best_new_action = -inf, None, None

        for action in self.state.possible_actions(self.colour):
            new_state = State.apply_action(self.state, action)
            new_eval = runner(new_state)[PLAYER_HASH[self.colour]]
            if new_eval > best_eval:
                best_eval = new_eval
                best_action = action
                if new_state.hash not in self.counts:
                    best_new_action = action
        return best_new_action if not None else best_action