示例#1
0
    def get_action(self):
        """
        Compute epsilon greedy move
        """
        legal_actions = self.Maze.get_legal_dirs(self.position)
        if utilities.rand_bool(self.epsilon):
            return utilities.rand_choice(legal_actions)

        # get mapping from move to value
        lst = [(self.qValues[(self.position, action)], action) for action in legal_actions]
        best = max(lst)[0]

        tiedMoves = [move for val, move in lst if val == best]
        return utilities.rand_choice(tiedMoves)
示例#2
0
    def get_action(self):
        legal_actions = self.Maze.get_legal_dirs(self.position)
        if utilities.rand_bool(self.epsilon):
            return utilities.rand_choice(legal_actions)

        boltz_values = self.softmax([self.qValues[(self.position, a)] for a in legal_actions])
        return np.random.choice(legal_actions, p=boltz_values)