示例#1
0
 def integrateObservation(self, obs):
     if self.learning and not self.learner.batchMode and self.lastobs is not None:
         if self.learner.passNextAction:
             self._oaro = [self.lastobs, self.lastaction, self.lastreward, obs]
         else:
             self.learner._updateWeights(self.lastobs, self.lastaction, self.lastreward, obs)
     LoggingAgent.integrateObservation(self, obs)        
示例#2
0
    def getAction(self):
        # get best action for every state observation
        # overlay all action values for every state observation, pick best
        LoggingAgent.getAction(self)

        # for each color, get best action, then pick highest-value action
        # among those actions
        actions = []
        values = []
        # TODO: why are same values printed many times in a row here?
        #print '========== in agent =========='
        #print 'states:', [[i] for i in self.lastobs.flatten()]
        for state in self.lastobs:
            #print 'state:', state
            actions.append(self.module.activate(state))
            values.append(self.module.lastMaxActionValue)
            #self.module.printState(state)
            #print ' best:', actions[-1], 'value:', values[-1]
        actionIdx = where(values == max(values))[0]
        ch = choice(actionIdx)
        self.lastaction = actions[ch]
        self.bestState = self.lastobs[ch]

        #print 'assigning reward to state', self.bestState
        #print 'chosen action:', self.lastaction, 'value:', max(values)
        # add a chance to pick a random other action
        if self.learning:
            self.lastaction = self.learner.explore(self.lastobs, self.lastaction)

        #print 'after explorer:', self.lastaction
        #print '============= end ============'
        return self.lastaction
示例#3
0
    def getAction(self):
        # get best action for every state observation
        # overlay all action values for every state observation, pick best
        LoggingAgent.getAction(self)

        # for each color, get best action, then pick highest-value action
        # among those actions
        actions = []
        values = []
        # TODO: why are same values printed many times in a row here?
        #print '========== in agent =========='
        #print 'states:', [[i] for i in self.lastobs.flatten()]
        for state in self.lastobs:
            #print 'state:', state
            actions.append(self.module.activate(state))
            values.append(self.module.lastMaxActionValue)
            #self.module.printState(state)
            #print ' best:', actions[-1], 'value:', values[-1]
        actionIdx = where(values == max(values))[0]
        ch = choice(actionIdx)
        self.lastaction = actions[ch]
        self.bestState = self.lastobs[ch]

        #print 'assigning reward to state', self.bestState
        #print 'chosen action:', self.lastaction, 'value:', max(values)
        # add a chance to pick a random other action
        if self.learning:
            self.lastaction = self.learner.explore(self.lastobs,
                                                   self.lastaction)

        #print 'after explorer:', self.lastaction
        #print '============= end ============'
        return self.lastaction
示例#4
0
 def reset(self):
     LoggingAgent.reset(self)
     self._temperature = self.init_temperature
     self._expl_proportion = self.init_exploration
     self.learner.reset()    
     self._oaro = None
     self.newEpisode()
示例#5
0
    def getAction(self):
        """ Activate the module with the last observation, add the exploration from
            the explorer object and store the result as last action. """
        LoggingAgent.getAction(self)

        self.lastaction = self.module.activate(self.lastobs)

        if self.learning:
            self.lastaction = self.learner.explore(self.lastobs, self.lastaction)

        return self.lastaction
示例#6
0
 def __init__(self, learner, **kwargs):
     LoggingAgent.__init__(self,learner.num_features,learner.num_actions, **kwargs)
     self.learner = learner
     self.reset()
     self.learning=True
     self.learner.dataset=self.history
     self.visited_states_x=[]
     self.visited_states_y=[]
     self.qvalues=[]
     self.actionvalues=[]
     self.init_exploration=1.0
示例#7
0
    def __init__(self, learner, sdim, adim=1, maxHistoryLength=1000,
                 batch=False):
        LoggingAgent.__init__(self, sdim, adim)
        self.learner = learner
        self.policy = self.learner.module.policy
        self.lastaction = None
        self.learning = True
        self.batch = batch

        self.currentDataIndex = 0
        self.maxHistoryLength = maxHistoryLength
示例#8
0
    def __init__(self, module, learner = None):
        """
        :key module: the acting module
        :key learner: the learner (optional) """

        LoggingAgent.__init__(self, module.indim, module.outdim)

        self.module = module
        self.learner = learner

        # if learner is available, tell it the module and data
        if self.learner is not None:
            self.learner.module = self.module
            self.learner.dataset = self.history

        self.learning = True
    def getAction(self):
        """ Activate the module with the last observation, add the exploration from
            the explorer object and store the result as last action. """
        LoggingAgent.getAction(self)       
        
        # Here is where the table or neural network returns the action
        # This consists of the values of the different actions
        # We choose the action with highet value 
       
        from numpy import argmax, size        
        tempAction =  self.module.activate(self.lastobs)
        if (tempAction.size > 1):     
            bestAction = argmax(tempAction)
            self.lastaction = [bestAction]
           
        else: # Original Code (used still for stuff like table lookup)
            self.lastaction = self.module.activate(self.lastobs)
         
        if self.learning:            
            self.lastaction = self.learner.explore(self.lastobs, self.lastaction)

        return self.lastaction
示例#10
0
    def getAction(self):
        # get best action for every state observation
        # overlay all action values for every state observation, pick best
        LoggingAgent.getAction(self)

        # for each color, get best action, then pick highest-value action
        # among those actions
        actions = []
        values = []
        num_colors = len(self.lastobs[0])
        # TODO: why are same values printed many times in a row here? episodes
        #print '========== in agent =========='
        #print 'states:', self.lastobs
        for board_loc in self.lastobs:
            for color_state in board_loc:
                #print 'state:', color_state
                actions.append(self.module.activate(color_state))
                values.append(self.module.lastMaxActionValue)
                #self.module.printState(state)
                #print ' best:', actions[-1], 'value:', values[-1]

                # add a chance to pick a random other action
                if self.learning:
                    actions[-1] = self.learner.explore(color_state,
                                                       actions[-1])

        actionIdx = where(values == max(values))[0]
        ch = choice(actionIdx)
        self.lastaction = [actions[ch], ch]
        loc, color = divmod(ch, num_colors)
        self.bestState = self.lastobs[loc][color]

        #print 'assigning reward to state', self.bestState
        #print 'chosen action:', self.lastaction, 'value:', max(values)

        #print '============= end ============'
        return self.lastaction
示例#11
0
    def getAction(self):
        # get best action for every state observation
        # overlay all action values for every state observation, pick best
        LoggingAgent.getAction(self)

        # for each color, get best action, then pick highest-value action
        # among those actions
        actions = []
        values = []
        num_colors = len(self.lastobs[0])
        # TODO: why are same values printed many times in a row here? episodes
        #print '========== in agent =========='
        #print 'states:', self.lastobs
        for board_loc in self.lastobs:
            for color_state in board_loc:
                #print 'state:', color_state
                actions.append(self.module.activate(color_state))
                values.append(self.module.lastMaxActionValue)
                #self.module.printState(state)
                #print ' best:', actions[-1], 'value:', values[-1]

                # add a chance to pick a random other action
                if self.learning:
                    actions[-1] = self.learner.explore(color_state, actions[-1])

        actionIdx = where(values == max(values))[0]
        ch = choice(actionIdx)
        self.lastaction = [actions[ch], ch]
        loc, color = divmod(ch, num_colors)
        self.bestState = self.lastobs[loc][color]

        #print 'assigning reward to state', self.bestState
        #print 'chosen action:', self.lastaction, 'value:', max(values)

        #print '============= end ============'
        return self.lastaction
示例#12
0
 def integrateObservation(self, obs):
     LoggingAgent.integrateObservation(self, obs)
示例#13
0
 def __init__(self, learner, **kwargs):
     LoggingAgent.__init__(self,2,1, **kwargs)
     self.learner = learner
     #self.reset()
     self.learning=True
     self.learner.dataset=self.history
示例#14
0
 def __init__(self, learner, **kwargs):
     LoggingAgent.__init__(self, learner.num_features, 1, **kwargs)
     self.learner = learner
     self.learner._behaviorPolicy = self._actionProbs
     self.reset()
示例#15
0
 def getAction(self):
     """This is basically the Actor part"""
     LoggingAgent.getAction(self)
     self.lastaction = self.policy.activate(self.lastobs)
     return self.lastaction
示例#16
0
 def reset(self):
     """ Clear the history of the agent and resets the module and learner. """
     LoggingAgent.reset(self)
     self.module.reset()
     if self.learning:
         self.learner.reset()
示例#17
0
 def reset(self):
     LoggingAgent.reset(self)        #clear dataset sequences
     self.learner.reset()
     #print('Dict',self.learner.ret_dict())
     self.newEpisode()
示例#18
0
 def reset(self):
     LoggingAgent.reset(self)  #clear dataset sequences
     self.learner.reset()
     self.newEpisode()