def compute_entropy(logits): """ :param logits: A matrix of size N * |A| :return: A vector of size N """ logp = log_softmax(logits) return -np.sum(logp * np.exp(logp), axis=-1)
def cartpole_get_logp_action(theta, ob, action): """ :param theta: A matrix of size |A| * (|S|+1) :param ob: A vector of size |S| :param action: An integer :return: A scalar """ return log_softmax(compute_logits(theta, ob))[action]