def getAction(self, rawState, epsilon):

        legalActions = rawState.getLegalActions()
        legalActions.remove(Directions.STOP)

        if util.flipCoin(epsilon):
            return random.choice(legalActions)

        else:
            qValues = [(Directions.getIndex(action),
                        self.getQValue(rawState, action))
                       for action in legalActions]
            qValues = sorted(qValues, key=lambda x: x[1], reverse=True)

            for index, qValue in qValues:
                action = Directions.fromIndex(index)
                if action in legalActions:
                    return action
    def getAction(self, rawState, epsilon):
        legalActions = rawState.getLegalActions()
        legalActions.remove(Directions.STOP)

        qState = self.trainingRoom.featuresExtractor.getFeatures(
            rawState, None)

        if util.flipCoin(epsilon):
            return random.choice(legalActions)

        else:
            qValues = list(
                enumerate(self.model.model.predict(np.array([qState]))[0]))
            qValues = sorted(qValues, key=lambda x: x[1], reverse=True)

            for index, qValue in qValues:
                action = Directions.fromIndex(index)
                if action in legalActions:
                    return action
Exemplo n.º 3
0
def getLegalActions(state):
    legalActions = state.getLegalActions()
    if Directions.STOP in legalActions: legalActions.remove(Directions.STOP)
    return np.array([Directions.fromIndex(i) in legalActions for i in range(4)])