class Learner:
    def __init__(self):
        self.program = Program()

    def reset(self):
        self.fitness = None
        self.num_skips = 0

    def act(self, state):
        # NOTE: Actions are defined very narrowly for CartPole:
        #       The value in register 0 is fed through a sigmod
        #       and rounded to either 0 or 1, the two valid
        #       actions for the CartPole environment. This does
        #       not generalize to other environments!!

        r0 = self.program.registers[0]
        action = int(round(sigmoid(r0)))

        return action

    def mutate(self):

    def save(self, name):
        pickle.dump(self, open(name + ".agent", 'wb'))
class Learner:
    '''A Learner is a light wrapper around a Program with just enough added
    functionality to facilitate evolution (ie. learning) and bidding.
    Learners must maintain some extra concepts that Programs don't need,
    described below.

    Learners must be able to bid, given a current environment state, (ie. the
    input). The bid is simply the floating point value found in the Learner's
    Program's R[0] after execution.

    After winning a bid, a Learner must execute some
    action. This can either be atomic, ie. simply an integer indexing into the
    action space available in the environment, or non-atomic, in which case
    the action is a pointer into another Team of Learners.

    As inconvenient as it is from a programming point of view, Learners need
    extensive understanding and knowledge of the Teams of which they are members.


    def __init__(self, action = None, learner = None):
        """Initialize new Learner. This can either be done from scratch or
        as a copy of a previous Learner, maintaining that Learner's action,
        which may be a pointer to a Team.

        Be wary of using deepcopy! The temptation to copy Learners via deepcopy
        was there, but this is a mistake since it will create copies of any Team
        pointed to by self.action. On the other hand, copying Programs via
        deepcopy is correct and a conveneient way to ensure that the new Program
        gets its own copy of the list of instructions.

        # Set default value. This is so that setAction() will function properly
        # when checking the current self.action type. In general, the action
        # should always be set via setAction()
        self.action = 0

        # This counter keeps track of how many Teams hold a pointer to
        # this Learner, ie. how many Teams this Learner is a member of.
        self.num_referencing_teams = 0

        if learner is None:
            # Create Program associated with Learner
            self.program = Program()


            if action is None:
                print("WARNING - Learner::init - No Learner and no Action")
                # Assign Learner's action value
                self.setAction(randint(0, Learner.ATOMIC_ACTION_RANGE))

            # Make a copy of the other Learner's Program
            self.program = deepcopy(learner.program)

            # Copy the other Learner's action, whether it's atomic or not

            # If new action is a Team pointer, update that Team's number of
            # referencing Learners
            if not self.isActionAtomic():

    def incrementNumReferencingTeams(self):
        self.num_referencing_teams += 1

    def decrementNumReferencingTeams(self):
        self.num_referencing_teams -= 1

    def getNumReferencingTeams(self):
        return self.num_referencing_teams

    def isActionAtomic(self):
        from Team import isTeam

        if isTeam(self.action):
            return False
        elif isinstance(self.action, int):
            return True
            print("WARNING - Learner::isActionAtomic - Action is not Team or int")
            print("          type(self.action) =", type(self.action))
            return False

    def bid(self, input):
        """Submit a bid to have this Learner's action taken."""
        return self.program.registers[0]

    def act(self, input, visited):
        """Perform action. If the action is atomic (ie. an integer) then return
        the Learner's integer action. If the action is not atomic (ie. it is
        a pointer to a Team), then call that Team's act() method.
        if self.isActionAtomic():
            return self.action
            return self.action.act(input, visited)

    def setAction(self, new_action):
        """Assign an action to this Learner."""

        # Perform necessary bookkeeping given the action being relinquished.
        # If the current action is a Team pointer, decrement that Team's
        # referencing Learner count.
        if not self.isActionAtomic():

        # If new action is atomic, simply set the action to new_action. Otherwise
        # perform bookkeeping on the new Team being pointed to before assigning
        # it to this Learner's action.
        from Team import isTeam
        if isTeam(new_action):
            self.action = new_action
            self.action = int(new_action)

    def mutateProgram(self):
        """Mutate this Learner's Program."""