Python CategoricalPd.neglogprob примеры использования

Язык программирования: Python

Пространство имен/Пакет: utils

Класс/Тип: CategoricalPd

Метод/Функция: neglogprob

Примеров на hotexamples.com: 2

Python CategoricalPd.neglogprob - 2 примера найдено. Это лучшие примеры Python кода для utils.CategoricalPd.neglogprob, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

CategoricalPd(4)

sample(4)

neglogp(2)

neglogprob(2)

Пример #1

Показать файл

class SharedMLP:
    def __init__(self, sess, state_dim, n_actions, reuse=False):
        # Model Input
        self.obs_in = tf.placeholder(dtype=tf.float32, shape=[None, state_dim], name='obs_in')
        with tf.variable_scope("model", reuse=reuse):
            h1 = tf.layers.dense(self.obs_in, units=20, activation=tf.nn.relu)
            h2 = tf.layers.dense(h1, units=20, activation=tf.nn.relu)

            self.ap_out = tf.layers.dense(h2, units=n_actions, activation=None)  # action probabilities
            self.vf_out = tf.layers.dense(h2, units=1, activation=None)  # state value

        # The output of the NN are non-normalized action probabilities. They are converted to a probabiltiy
        # distribution from which normalized probabilities can be sampled.
        self.pd = CategoricalPd(self.ap_out)  # Init the distribution with output values of NN
        a0 = self.pd.sample()  # sample probabilities for each action from probability distribution which adds small unifrom noise to the prob distribution derived from NN output (a0=[n_actions])
        v0 = self.vf_out[:, 0]

        neglogprob0 = self.pd.neglogprob(a0)  # a0 are the labels for the cross entropy computation
        self.initial_states = None

        # Prediction functions for a complete step and for the state value only
        def step(obs, dones, lstm_states):
            a, v, neglogprob = sess.run([a0, v0, neglogprob0], {self.obs_in: obs})
            return a, v, self.initial_states, neglogprob

        def value(obs, dones, lstm_states):
            return sess.run(v0, {self.obs_in: obs})
            # return sess.run(self.vf_out, {self.obs_in: obs})

        self.step = step
        self.value = value
        self.a0 = a0

Пример #2

Показать файл

class LSTM_CatPD:
    def __init__(self, sess, state_dim, n_actions, n_steps, n_lstm=256, reuse=False):
        self.obs_in = tf.placeholder(dtype=tf.float32, shape=[None, state_dim], name='obs_in') # observations
        self.D = tf.placeholder(dtype=tf.float32, shape=[None], name='dones')  # dones
        self.LS = tf.placeholder(dtype=tf.float32, shape=[None, n_lstm*2], name='lstm_s')  # cell and hidden states

        with tf.variable_scope("model", reuse=reuse):
            h1 = tf.layers.dense(self.obs_in, units=20, activation=tf.nn.relu)
            h2 = tf.layers.dense(h1, units=20, activation=tf.nn.relu)

            # LSTM cell
            h3, s_new = lstm(h2, self.D, self.LS, scope='lstm', n_lstm=n_lstm)

            self.ap_out = tf.layers.dense(h3, units=n_actions, activation=None)
            self.vf_out = tf.layers.dense(h3, units=1, activation=None)

        # The output of the NN are non-normalized action probabilities. They are converted to a probabiltiy
        # distribution from which normalized probabilities can be sampled.
        self.pd = CategoricalPd(self.ap_out)  # Init the distribution with output values of NN
        a0 = self.pd.sample()  # sample probabilities for each action from probability distribution which adds small unifrom noise to the prob distribution derived from NN output (a0=[n_actions])
        v0 = self.vf_out[:, 0]

        neglogprob0 = self.pd.neglogprob(a0)  # a0 are the labels for the cross entropy computation
        self.initial_states = [np.zeros(shape=n_lstm*2, dtype=np.float32)]

        def step(obs, dones, lstm_states):
            return sess.run([a0, self.ap_out, v0, s_new, neglogprob0], {self.obs_in: obs, self.D: dones, self.LS: lstm_states})

        def value(obs, dones, lstm_states):
            return sess.run(v0, {self.obs_in: obs, self.D: dones, self.LS: lstm_states})
            # return sess.run([self.vf_out], {self.obs_in: obs, self.D: dones, self.LS: lstm_states})

        self.step = step
        self.value = value
        self.a0 = a0