def _getMaxActionValue(self, state): maxActionId = 0 idAction = Action.IdToAction(maxActionId, state) maxValue = self._getQValue(state, idAction) for i in range(1, Action.getNumActions()): idAction = Action.IdToAction(i, state) currentValue = self._getQValue(state, idAction) if currentValue > maxValue: maxValue = currentValue maxActionId = i return maxActionId, maxValue
def loadQValues(self, filepath): loadFile = open(filepath, 'r') n = State.getNumStates() m = Action.getNumActions() for i in range(n): for j in range(m): self.qValues[i][j] = float(loadFile.readline()) loadFile.close()
def saveQValues(self, filepath): saveFile = open(filepath, 'w') n = State.getNumStates() m = Action.getNumActions() for i in range(n): for j in range(m): saveFile.write(str(self.qValues[i][j]) + "\n") saveFile.close()
def __init__(self, initValue=0): self.qValues = [] n = State.getNumStates() m = Action.getNumActions() for i in range(n): # create state array of actions self.qValues.append([]) # populate with default values for j in range(m): self.qValues[i].append(initValue)
def _getMaxActionValue(self, state): numActions = Action.getNumActions() stateTable = self.qValues[state.getStateId()] maxActionId = 0 maxValue = self._getQValue(state, maxActionId) for actionId in range(numActions): currentValue = self._getQValue(state, actionId) if currentValue > maxValue: maxActionId = actionId maxValue = currentValue return maxActionId, maxValue
def getPiAction(self, state): maxActionId, _ = self._getMaxActionValue(state) maxAction = Action.IdToAction(maxActionId, state.convertToServoState()) return maxAction