Exemplo n.º 1
0
class Learner:
    def __init__(self):
        self.program = Program()
        self.reset()

    def reset(self):
        self.fitness = None
        self.num_skips = 0

    def act(self, state):
        # NOTE: Actions are defined very narrowly for CartPole:
        #       The value in register 0 is fed through a sigmod
        #       and rounded to either 0 or 1, the two valid
        #       actions for the CartPole environment. This does
        #       not generalize to other environments!!
        self.program.execute(state)

        r0 = self.program.registers[0]
        action = int(round(sigmoid(r0)))

        return action

    def mutate(self):
        self.program.mutate()

    def save(self, name):
        pickle.dump(self, open(name + ".agent", 'wb'))
Exemplo n.º 2
0
class Learner:
    '''A Learner is a light wrapper around a Program with just enough added
    functionality to facilitate evolution (ie. learning) and bidding.
    Learners must maintain some extra concepts that Programs don't need,
    described below.

    Bidding:
    Learners must be able to bid, given a current environment state, (ie. the
    input). The bid is simply the floating point value found in the Learner's
    Program's R[0] after execution.

    Actions:
    After winning a bid, a Learner must execute some
    action. This can either be atomic, ie. simply an integer indexing into the
    action space available in the environment, or non-atomic, in which case
    the action is a pointer into another Team of Learners.

    Teams:
    As inconvenient as it is from a programming point of view, Learners need
    extensive understanding and knowledge of the Teams of which they are members.
    '''

    MAX_ACTION_RANGE = 18

    def __init__(self, action = None, learner = None):
        """Initialize new Learner. This can either be done from scratch or
        as a copy of a previous Learner, maintaining that Learner's action,
        which may be a pointer to a Team.

        Be wary of using deepcopy! The temptation to copy Learners via deepcopy
        was there, but this is a mistake since it will create copies of any Team
        pointed to by self.action. On the other hand, copying Programs via
        deepcopy is correct and a conveneient way to ensure that the new Program
        gets its own copy of the list of instructions.
        """

        # Set default value. This is so that setAction() will function properly
        # when checking the current self.action type. In general, the action
        # should always be set via setAction()
        self.action = 0

        # This counter keeps track of how many Teams hold a pointer to
        # this Learner, ie. how many Teams this Learner is a member of.
        self.num_referencing_teams = 0

        if learner is None:
            # Create Program associated with Learner
            self.program = Program()

            self.setAction(action)

            if action is None:
                print("WARNING - Learner::init - No Learner and no Action")
                # Assign Learner's action value
                self.setAction(randint(0, Learner.ATOMIC_ACTION_RANGE))

        else:
            # Make a copy of the other Learner's Program
            self.program = deepcopy(learner.program)

            # Copy the other Learner's action, whether it's atomic or not
            self.setAction(learner.action)

            # If new action is a Team pointer, update that Team's number of
            # referencing Learners
            if not self.isActionAtomic():
                self.action.incrementNumReferencingLearners()


    def incrementNumReferencingTeams(self):
        self.num_referencing_teams += 1


    def decrementNumReferencingTeams(self):
        self.num_referencing_teams -= 1


    def getNumReferencingTeams(self):
        return self.num_referencing_teams


    def isActionAtomic(self):
        from Team import isTeam

        if isTeam(self.action):
            return False
        elif isinstance(self.action, int):
            return True
        else:
            print("WARNING - Learner::isActionAtomic - Action is not Team or int")
            print("          type(self.action) =", type(self.action))
            return False


    def bid(self, input):
        """Submit a bid to have this Learner's action taken."""
        self.program.execute(input)
        return self.program.registers[0]


    def act(self, input, visited):
        """Perform action. If the action is atomic (ie. an integer) then return
        the Learner's integer action. If the action is not atomic (ie. it is
        a pointer to a Team), then call that Team's act() method.
        """
        if self.isActionAtomic():
            return self.action
        else:
            return self.action.act(input, visited)


    def setAction(self, new_action):
        """Assign an action to this Learner."""

        # Perform necessary bookkeeping given the action being relinquished.
        # If the current action is a Team pointer, decrement that Team's
        # referencing Learner count.
        if not self.isActionAtomic():
            self.action.decrementNumReferencingLearners()

        # If new action is atomic, simply set the action to new_action. Otherwise
        # perform bookkeeping on the new Team being pointed to before assigning
        # it to this Learner's action.
        from Team import isTeam
        if isTeam(new_action):
            new_action.incrementNumReferencingLearners()
            self.action = new_action
        else:
            self.action = int(new_action)


    def mutateProgram(self):
        """Mutate this Learner's Program."""
        self.program.mutate()