Пример #1
0
 def _setup(self, conditions):
     """ if agent is discrete in states and actions create Q-Table. """
     Agent._setup(self, conditions)
     if not (self.conditions['discreteStates'] and self.conditions['discreteActions']):
         raise AgentException('QAgent expects discrete states and actions. Use adapter or a different environment.')
         
     self.estimator = TableEstimator(self.conditions['stateNum'], self.conditions['actionNum'])
Пример #2
0
class QAgent(Agent):
    
    alpha = 0.5
    gamma = 0.9
    
    def _setup(self, conditions):
        """ if agent is discrete in states and actions create Q-Table. """
        Agent._setup(self, conditions)
        if not (self.conditions['discreteStates'] and self.conditions['discreteActions']):
            raise AgentException('QAgent expects discrete states and actions. Use adapter or a different environment.')
            
        self.estimator = TableEstimator(self.conditions['stateNum'], self.conditions['actionNum'])
    
    def _calculate(self):
        self.action = self.estimator.getBestAction(self.state)
    
    def learn(self):
        """ go through whole episode and make Q-value updates. """
        for episode in self.history:
            for state, action, reward, nextstate in episode:
                state = int(state)
                action = int(action)
     
                qvalue = self.estimator.getValue(state, action)
                if nextstate != None:
                    nextstate = int(nextstate)
                    maxnext = self.estimator.getValue(nextstate, self.estimator.getBestAction(nextstate))
                else:
                    maxnext = 0.

                self.estimator.updateValue(state, action, qvalue + self.alpha * (reward + self.gamma * maxnext - qvalue))