Exemplo n.º 1
0
 def computeValueFromQValues(self, state):
     """
       Returns max_action Q(state,action)
       where the max is over legal actions.  Note that if
       there are no legal actions, which is the case at the
       terminal state, you should return a value of 0.0.
     """
     actions = ReinforcementAgent.getLegalActions(self, state)
     if len(actions) < 1:
         return 0.0
     else:
         ## max value among all actions
         max = -float("inf")
         for action in actions:
             val = self.getQValue(state, action)
             if max < val:
                 max = val
         return max
Exemplo n.º 2
0
 def computeActionFromQValues(self, state):
     """
       Compute the best action to take in a state.  Note that if there
       are no legal actions, which is the case at the terminal state,
       you should return None.
     """
     actions = ReinforcementAgent.getLegalActions(self, state)
     if len(actions) < 1:
         return None
     else:
         max = -float("inf")
         action = None
         for a in actions:
             val = self.getQValue(state, a)
             if max < val:
                 max = val
                 action = a
         return action
Exemplo n.º 3
0
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legalActions = ReinforcementAgent.getLegalActions(self, state)
        if len(legalActions) == 0:
            return None
        else:
            isRandom = util.flipCoin(self.epsilon)
            if isRandom:
                return random.choice(legalActions)
            else:
                return self.computeActionFromQValues(state)