def integrateObservation(self, obs): if self.learning and not self.learner.batchMode and self.lastobs is not None: if self.learner.passNextAction: self._oaro = [self.lastobs, self.lastaction, self.lastreward, obs] else: self.learner._updateWeights(self.lastobs, self.lastaction, self.lastreward, obs) LoggingAgent.integrateObservation(self, obs)
def getAction(self): # get best action for every state observation # overlay all action values for every state observation, pick best LoggingAgent.getAction(self) # for each color, get best action, then pick highest-value action # among those actions actions = [] values = [] # TODO: why are same values printed many times in a row here? #print '========== in agent ==========' #print 'states:', [[i] for i in self.lastobs.flatten()] for state in self.lastobs: #print 'state:', state actions.append(self.module.activate(state)) values.append(self.module.lastMaxActionValue) #self.module.printState(state) #print ' best:', actions[-1], 'value:', values[-1] actionIdx = where(values == max(values))[0] ch = choice(actionIdx) self.lastaction = actions[ch] self.bestState = self.lastobs[ch] #print 'assigning reward to state', self.bestState #print 'chosen action:', self.lastaction, 'value:', max(values) # add a chance to pick a random other action if self.learning: self.lastaction = self.learner.explore(self.lastobs, self.lastaction) #print 'after explorer:', self.lastaction #print '============= end ============' return self.lastaction
def reset(self): LoggingAgent.reset(self) self._temperature = self.init_temperature self._expl_proportion = self.init_exploration self.learner.reset() self._oaro = None self.newEpisode()
def getAction(self): """ Activate the module with the last observation, add the exploration from the explorer object and store the result as last action. """ LoggingAgent.getAction(self) self.lastaction = self.module.activate(self.lastobs) if self.learning: self.lastaction = self.learner.explore(self.lastobs, self.lastaction) return self.lastaction
def __init__(self, learner, **kwargs): LoggingAgent.__init__(self,learner.num_features,learner.num_actions, **kwargs) self.learner = learner self.reset() self.learning=True self.learner.dataset=self.history self.visited_states_x=[] self.visited_states_y=[] self.qvalues=[] self.actionvalues=[] self.init_exploration=1.0
def __init__(self, learner, sdim, adim=1, maxHistoryLength=1000, batch=False): LoggingAgent.__init__(self, sdim, adim) self.learner = learner self.policy = self.learner.module.policy self.lastaction = None self.learning = True self.batch = batch self.currentDataIndex = 0 self.maxHistoryLength = maxHistoryLength
def __init__(self, module, learner = None): """ :key module: the acting module :key learner: the learner (optional) """ LoggingAgent.__init__(self, module.indim, module.outdim) self.module = module self.learner = learner # if learner is available, tell it the module and data if self.learner is not None: self.learner.module = self.module self.learner.dataset = self.history self.learning = True
def getAction(self): """ Activate the module with the last observation, add the exploration from the explorer object and store the result as last action. """ LoggingAgent.getAction(self) # Here is where the table or neural network returns the action # This consists of the values of the different actions # We choose the action with highet value from numpy import argmax, size tempAction = self.module.activate(self.lastobs) if (tempAction.size > 1): bestAction = argmax(tempAction) self.lastaction = [bestAction] else: # Original Code (used still for stuff like table lookup) self.lastaction = self.module.activate(self.lastobs) if self.learning: self.lastaction = self.learner.explore(self.lastobs, self.lastaction) return self.lastaction
def getAction(self): # get best action for every state observation # overlay all action values for every state observation, pick best LoggingAgent.getAction(self) # for each color, get best action, then pick highest-value action # among those actions actions = [] values = [] num_colors = len(self.lastobs[0]) # TODO: why are same values printed many times in a row here? episodes #print '========== in agent ==========' #print 'states:', self.lastobs for board_loc in self.lastobs: for color_state in board_loc: #print 'state:', color_state actions.append(self.module.activate(color_state)) values.append(self.module.lastMaxActionValue) #self.module.printState(state) #print ' best:', actions[-1], 'value:', values[-1] # add a chance to pick a random other action if self.learning: actions[-1] = self.learner.explore(color_state, actions[-1]) actionIdx = where(values == max(values))[0] ch = choice(actionIdx) self.lastaction = [actions[ch], ch] loc, color = divmod(ch, num_colors) self.bestState = self.lastobs[loc][color] #print 'assigning reward to state', self.bestState #print 'chosen action:', self.lastaction, 'value:', max(values) #print '============= end ============' return self.lastaction
def integrateObservation(self, obs): LoggingAgent.integrateObservation(self, obs)
def __init__(self, learner, **kwargs): LoggingAgent.__init__(self,2,1, **kwargs) self.learner = learner #self.reset() self.learning=True self.learner.dataset=self.history
def __init__(self, learner, **kwargs): LoggingAgent.__init__(self, learner.num_features, 1, **kwargs) self.learner = learner self.learner._behaviorPolicy = self._actionProbs self.reset()
def getAction(self): """This is basically the Actor part""" LoggingAgent.getAction(self) self.lastaction = self.policy.activate(self.lastobs) return self.lastaction
def reset(self): """ Clear the history of the agent and resets the module and learner. """ LoggingAgent.reset(self) self.module.reset() if self.learning: self.learner.reset()
def reset(self): LoggingAgent.reset(self) #clear dataset sequences self.learner.reset() #print('Dict',self.learner.ret_dict()) self.newEpisode()
def reset(self): LoggingAgent.reset(self) #clear dataset sequences self.learner.reset() self.newEpisode()