Python Normal.sample примеры использования

Язык программирования: Python

Пространство имен/Пакет: tensorflow.contrib.distributions

Класс/Тип: Normal

Метод/Функция: sample

Примеров на hotexamples.com: 13

Python Normal.sample - 13 примеров найдено. Это лучшие примеры Python кода для tensorflow.contrib.distributions.Normal.sample, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Normal(30)

log_prob(25)

sample(12)

log_pdf(4)

prob(4)

entropy(2)

cdf(1)

get_variables(1)

mean(1)

pdf(1)

Пример #1

Показать файл

Файл: loss_func.py Проект: xianwujie/nsynth_wavenet

def gauss_sample(gauss_params, quant_chann, use_log_scales=True):
    mean, std = mean_std_from_out_params(gauss_params, use_log_scales)
    distribution = Normal(loc=mean, scale=std)
    x = distribution.sample()
    x = tf.clip_by_value(x, -1., 1. - 2. / quant_chann)
    x_quantized = utils.cast_quantize(x, quant_chann)
    return x_quantized

Пример #2

Показать файл

def make_dists_and_sample(latent_sample_seq):
    # latent_sample_seq constists of means and log_stds
    latent_dim = int(latent_sample_seq.get_shape().as_list()[-1] / 2)
    latent_dists = Normal(loc=latent_sample_seq[..., :latent_dim],
                          scale=tf.exp(latent_sample_seq[..., latent_dim:]))
    latent_sample_seq = tf.squeeze(latent_dists.sample(
        [1]))  # sample one sample from each distribution
    return latent_dists, latent_sample_seq

Пример #3

Показать файл

 def _sample(self, mu, std_dev):
     """
 Sample from parametrized Gaussian distribution.
 :param mu: Gaussian mean.
 :param std_dev: Standard deviation of the Gaussian.
 :return: Sample z.
 """
     z_dists = Normal(loc=mu, scale=std_dev)
     z = tf.squeeze(z_dists.sample(
         [1]))  # sample one sample from each distribution
     return z

Пример #4

Показать файл

Файл: main.py Проект: BruceChaun/homework

def main_pendulum(logdir,
                  seed,
                  n_iter,
                  gamma,
                  min_timesteps_per_batch,
                  initial_stepsize,
                  desired_kl,
                  vf_type,
                  vf_params,
                  animate=False):
    tf.set_random_seed(seed)
    np.random.seed(seed)
    env = gym.make("Pendulum-v0")
    ob_dim = env.observation_space.shape[0]
    ac_dim = env.action_space.shape[0]
    logz.configure_output_dir(logdir)
    if vf_type == 'linear':
        vf = LinearValueFunction(**vf_params)
    elif vf_type == 'nn':
        vf = NnValueFunction(ob_dim=ob_dim, **vf_params)

    ####
    # YOUR_CODE_HERE

    # batch of observations
    sy_ob_no = tf.placeholder(shape=[None, ob_dim],
                              name="ob",
                              dtype=tf.float32)
    # batch of actions
    sy_ac_n = tf.placeholder(shape=[None], name="ac", dtype=tf.float32)
    # batch of advantage function estimates
    sy_adv_n = tf.placeholder(shape=[None], name="adv", dtype=tf.float32)

    # 2-layer network to learn state from observation
    sy_h1 = lrelu(dense(sy_ob_no, 32, "h1",
                        weight_init=normc_initializer(1.0)))
    sy_h2 = lrelu(dense(sy_h1, 32, "h2", weight_init=normc_initializer(1.0)))
    # Mean control output
    sy_mean_na = dense(sy_h2,
                       ac_dim,
                       "mean",
                       weight_init=normc_initializer(0.1))
    # Variance
    logstd_a = tf.get_variable("logstdev", [ac_dim])

    # define action distribution
    sy_ac_distr = Normal(mu=tf.squeeze(sy_mean_na),
                         sigma=tf.exp(logstd_a),
                         validate_args=True)
    # sampled actions, used for defining the policy
    # (NOT computing the policy gradient)
    sy_sampled_ac = tf.squeeze(sy_ac_distr.sample(sample_shape=[ac_dim]))

    sy_n = tf.shape(sy_ob_no)[0]
    sy_logprob_n = sy_ac_distr.log_pdf(sy_ac_n)

    # used for computing KL and entropy, JUST FOR DIAGNOSTIC PURPOSES
    sy_oldmean_na = tf.placeholder(shape=[None, ac_dim],
                                   name='oldmean',
                                   dtype=tf.float32)
    sy_oldlogstd_a = tf.placeholder(shape=[ac_dim],
                                    name="oldlogstdev",
                                    dtype=tf.float32)
    sy_ac_olddistr = Normal(mu=tf.squeeze(sy_oldmean_na),
                            sigma=tf.exp(sy_oldlogstd_a),
                            validate_args=True)

    sy_kl = tf.reduce_mean(
        tf.contrib.distributions.kl(sy_ac_distr, sy_ac_olddistr))
    sy_ent = tf.reduce_mean(sy_ac_distr.entropy())

    ####

    sy_surr = -tf.reduce_mean(
        sy_adv_n * sy_logprob_n
    )  # Loss function that we'll differentiate to get the policy gradient ("surr" is for "surrogate loss")

    sy_stepsize = tf.placeholder(
        shape=[], dtype=tf.float32
    )  # Symbolic, in case you want to change the stepsize during optimization. (We're not doing that currently)
    update_op = tf.train.AdamOptimizer(sy_stepsize).minimize(sy_surr)

    sess = tf.Session()
    sess.__enter__()  # equivalent to `with sess:`
    tf.global_variables_initializer().run()  #pylint: disable=E1101

    total_timesteps = 0
    stepsize = initial_stepsize

    for i in range(n_iter):
        print("********** Iteration %i ************" % i)

        ####
        # YOUR_CODE_HERE

        # Collect paths until we have enough timesteps
        timesteps_this_batch = 0
        paths = []
        while True:
            ob = env.reset()
            terminated = False
            obs, acs, rewards = [], [], []
            animate_this_episode = (len(paths) == 0 and (i % 10 == 0)
                                    and animate)
            while True:
                if animate_this_episode:
                    env.render()
                obs.append(ob)
                ac = sess.run(sy_sampled_ac, feed_dict={sy_ob_no: ob[None]})
                acs.append(ac)
                ob, rew, done, _ = env.step([ac])
                rewards.append(rew)
                if done:
                    break

            path = {
                "observation": np.array(obs),
                "terminated": terminated,
                "reward": np.array(rewards),
                "action": np.array(acs)
            }
            paths.append(path)
            timesteps_this_batch += pathlength(path)
            if timesteps_this_batch > min_timesteps_per_batch:
                break

        total_timesteps += timesteps_this_batch

        # Estimate advantage function
        vtargs, vpreds, advs = [], [], []
        for path in paths:
            rew_t = path["reward"]
            return_t = discount(rew_t, gamma)
            vpred_t = vf.predict(path["observation"])
            adv_t = return_t - vpred_t
            advs.append(adv_t)
            vtargs.append(return_t)
            vpreds.append(vpred_t)

        # Build arrays for policy update
        ob_no = np.concatenate([path["observation"] for path in paths])
        ac_n = np.concatenate([path["action"] for path in paths])
        adv_n = np.concatenate(advs)
        standardized_adv_n = (adv_n - adv_n.mean()) / (adv_n.std() + 1e-8)
        vtarg_n = np.concatenate(vtargs)
        vpred_n = np.concatenate(vpreds)
        vf.fit(ob_no, vtarg_n)

        # Policy update
        _, oldmean_na, oldlogstdev = sess.run(
            [update_op, sy_mean_na, logstd_a],
            feed_dict={
                sy_ob_no: ob_no,
                sy_ac_n: ac_n,
                sy_adv_n: standardized_adv_n,
                sy_stepsize: stepsize
            })
        kl, ent = sess.run(
            [sy_kl, sy_ent],
            feed_dict={
                sy_ob_no: ob_no,
                sy_oldmean_na: oldmean_na,
                sy_oldlogstd_a: oldlogstdev
            })

        ####

        if kl > desired_kl * 2:
            stepsize /= 1.5
            print('stepsize -> %s' % stepsize)
        elif kl < desired_kl / 2:
            stepsize *= 1.5
            print('stepsize -> %s' % stepsize)
        else:
            print('stepsize OK')

        # Log diagnostics
        logz.log_tabular("EpRewMean",
                         np.mean([path["reward"].sum() for path in paths]))
        logz.log_tabular("EpLenMean",
                         np.mean([pathlength(path) for path in paths]))
        logz.log_tabular("KLOldNew", kl)
        logz.log_tabular("Entropy", ent)
        logz.log_tabular("EVBefore", explained_variance_1d(vpred_n, vtarg_n))
        logz.log_tabular("EVAfter",
                         explained_variance_1d(vf.predict(ob_no), vtarg_n))
        logz.log_tabular("TimestepsSoFar", total_timesteps)
        # If you're overfitting, EVAfter will be way larger than EVBefore.
        # Note that we fit value function AFTER using it to compute the advantage function to avoid introducing bias
        logz.dump_tabular()

Пример #5

Показать файл

Файл: Playground.py Проект: caiyudian/GLMM-in-Python

with tf.name_scope("cost"):
    #mean_squared_error
    RSEcost = tf.reduce_mean(
        tf.square(y - y_mu))  # use square error for cost function

    #    #negative log-likelihood (same as maximum-likelihood)
    #    y_sigma  = tf.sqrt(tfmixedmodel(Xtf, tf.square(std_encoder1), Ztf, tf.square(std_encoder2)))
    #    NLLcost  = - tf.reduce_sum(-0.5 * tf.log(2. * np.pi) - tf.log(y_sigma)
    #                               -0.5 * tf.square((y - y_mu)/y_sigma))

    #Mean-field Variational inference using ELBO
    p_log_prob = [0.0] * n_samples
    q_log_prob = [0.0] * n_samples
    for s in range(n_samples):
        beta_tf_copy = Normal(loc=beta_mu, scale=std_encoder1)
        beta_sample = beta_tf_copy.sample()
        q_log_prob[s] += tf.reduce_sum(beta_tf.log_prob(beta_sample))
        b_tf_copy = Normal(loc=b_mu, scale=std_encoder2)
        b_sample = b_tf_copy.sample()
        q_log_prob[s] += tf.reduce_sum(b_tf.log_prob(b_sample))

        priormodel = Normal(loc=priormu, scale=priorsigma)
        y_sample = tf.matmul(Xtf, beta_sample) + tf.matmul(Ztf, b_sample)
        p_log_prob[s] += tf.reduce_sum(priormodel.log_prob(beta_sample))
        p_log_prob[s] += tf.reduce_sum(priormodel.log_prob(b_sample))
        modelcopy = Normal(loc=y_sample, scale=priorliksigma)
        p_log_prob[s] += tf.reduce_sum(modelcopy.log_prob(y))

    p_log_prob = tf.stack(p_log_prob)
    q_log_prob = tf.stack(q_log_prob)
    ELBO = -tf.reduce_mean(p_log_prob - q_log_prob)

Пример #6

Показать файл

    def _build_ad_nn(self, tensor_io):
        from drlutils.dataflow.tensor_io import TensorIO
        assert (isinstance(tensor_io, TensorIO))
        from drlutils.model.base import get_current_nn_context
        from tensorpack.tfutils.common import get_global_step_var
        global_step = get_global_step_var()
        nnc = get_current_nn_context()
        is_training = nnc.is_training
        i_state = tensor_io.getInputTensor('state')
        i_agentIdent = tensor_io.getInputTensor('agentIdent')
        i_sequenceLength = tensor_io.getInputTensor('sequenceLength')
        i_resetRNN = tensor_io.getInputTensor('resetRNN')
        l = i_state
        # l = tf.Print(l, [i_state, tf.shape(i_state)], 'State = ')
        # l = tf.Print(l, [i_agentIdent, tf.shape(i_agentIdent)], 'agentIdent = ')
        # l = tf.Print(l, [i_sequenceLength, tf.shape(i_sequenceLength)], 'SeqLen = ')
        # l = tf.Print(l, [i_resetRNN, tf.shape(i_resetRNN)], 'resetRNN = ')
        with tf.variable_scope('critic', reuse=nnc.reuse) as vs:

            def _get_cell():
                cell = tf.nn.rnn_cell.BasicLSTMCell(256)
                # if is_training:
                #     cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=0.9)
                return cell

            cell = tf.nn.rnn_cell.MultiRNNCell([_get_cell() for _ in range(1)])
            rnn_outputs = self._buildRNN(
                l,
                cell,
                tensor_io.batchSize,
                i_agentIdent=i_agentIdent,
                i_sequenceLength=i_sequenceLength,
                i_resetRNN=i_resetRNN,
            )
            rnn_outputs = tf.reshape(
                rnn_outputs, [-1, rnn_outputs.get_shape().as_list()[-1]])
            l = rnn_outputs
            from ad_cur.autodrive.model.selu import fc_selu
            for lidx in range(2):
                l = fc_selu(
                    l,
                    200,
                    keep_prob=1.,  # 由于我们只使用传感器训练，关键信息不能丢
                    is_training=is_training,
                    name='fc-{}'.format(lidx))
            value = tf.layers.dense(l, 1, name='fc-value')
            value = tf.squeeze(value, [1], name="value")
            if not hasattr(self, '_weights_critic'):
                self._weights_critic = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        with tf.variable_scope('actor', reuse=nnc.reuse) as vs:
            l = tf.stop_gradient(l)
            l = tf.layers.dense(l,
                                128,
                                activation=tf.nn.relu6,
                                name='fc-actor')
            mu_steering = 0.5 * tf.layers.dense(
                l, 1, activation=tf.nn.tanh, name='fc-mu-steering')
            mu_accel = tf.layers.dense(l,
                                       1,
                                       activation=tf.nn.tanh,
                                       name='fc-mu-accel')
            mus = tf.concat([mu_steering, mu_accel], axis=-1)

            # mus = tf.layers.dense(l, 2, activation=tf.nn.tanh, name='fc-mus')
            # sigmas = tf.layers.dense(l, 2, activation=tf.nn.softplus, name='fc-sigmas')
            # sigmas = tf.clip_by_value(sigmas, -0.001, 0.5)
            def saturating_sigmoid(x):
                """Saturating sigmoid: 1.2 * sigmoid(x) - 0.1 cut to [0, 1]."""
                with tf.name_scope("saturating_sigmoid", [x]):
                    y = tf.sigmoid(x)
                    return tf.minimum(1.0, tf.maximum(0.0, 1.2 * y - 0.1))

            sigma_steering_ = 0.1 * tf.layers.dense(
                l, 1, activation=tf.nn.sigmoid, name='fc-sigma-steering')
            sigma_accel_ = 0.25 * tf.layers.dense(
                l, 1, activation=tf.nn.sigmoid, name='fc-sigma-accel')

            if not nnc.is_evaluating:
                sigma_beta_steering = tf.get_default_graph(
                ).get_tensor_by_name('actor/sigma_beta_steering:0')
                sigma_beta_accel = tf.get_default_graph().get_tensor_by_name(
                    'actor/sigma_beta_accel:0')
                sigma_beta_steering = tf.constant(1e-4)
                # sigma_beta_steering_exp = tf.train.exponential_decay(0.3, global_step, 1000, 0.5, name='sigma/beta/steering/exp')
                # sigma_beta_accel_exp = tf.train.exponential_decay(0.5, global_step, 5000, 0.5, name='sigma/beta/accel/exp')
            else:
                sigma_beta_steering = tf.constant(1e-4)
                sigma_beta_accel = tf.constant(1e-4)
            sigma_steering = (sigma_steering_ + sigma_beta_steering)
            sigma_accel = (sigma_accel_ + sigma_beta_accel)

            sigmas = tf.concat([sigma_steering, sigma_accel], axis=-1)
            # if is_training:
            #     pass
            #     # 如果不加sigma_beta，收敛会很慢，并且不稳定，猜测可能是以下原因：
            #     #   1、训练前期尽量大的探索可以避免网络陷入局部最优
            #     #   2、前期过小的sigma会使normal_dist的log_prob过大，导致梯度更新过大，网络一开始就畸形了，很难恢复回来
            #
            # if is_training:
            #     sigmas += sigma_beta_steering
            # sigma_steering = tf.clip_by_value(sigma_steering, sigma_beta_steering, 0.5)
            # sigma_accel = tf.clip_by_value(sigma_accel, sigma_beta_accel, 0.5)
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigmas_orig = sigmas
            # sigmas = sigmas + sigma_beta_steering
            # sigmas = tf.minimum(sigmas + 0.1, 100)
            # sigmas = tf.clip_by_value(sigmas, sigma_beta_steering, 1)
            # sigma_steering += sigma_beta_steering
            # sigma_accel += sigma_beta_accel

            # mus = tf.concat([mu_steering, mu_accel], axis=-1)

            from tensorflow.contrib.distributions import Normal
            dists = Normal(mus, sigmas + 0.01)
            policy = tf.squeeze(dists.sample([1]), [0])
            # 裁剪到两倍方差之内
            policy = tf.clip_by_value(policy, mus - 2 * sigmas,
                                      mus + 2 * sigmas)
            if is_training:
                self._addMovingSummary(
                    tf.reduce_mean(mu_steering, name='mu/steering/mean'),
                    tf.reduce_mean(mu_accel, name='mu/accel/mean'),
                    tf.reduce_mean(sigma_steering, name='sigma/steering/mean'),
                    tf.reduce_max(sigma_steering, name='sigma/steering/max'),
                    tf.reduce_mean(sigma_accel, name='sigma/accel/mean'),
                    tf.reduce_max(sigma_accel, name='sigma/accel/max'),
                    # sigma_beta_accel,
                    # sigma_beta_steering,
                )
            # actions = tf.Print(actions, [mus, sigmas, tf.concat([sigma_steering_, sigma_accel_], -1), actions],
            #                    'mu/sigma/sigma.orig/act=', summarize=4)
            if not hasattr(self, '_weights_actor'):
                self._weights_actor = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)
        if not is_training:
            tensor_io.setOutputTensors(policy, value, mus, sigmas)
            return

        i_actions = tensor_io.getInputTensor("action")
        # i_actions = tf.Print(i_actions, [i_actions], 'actions = ')
        i_actions = tf.reshape(i_actions,
                               [-1] + i_actions.get_shape().as_list()[2:])
        log_probs = dists.log_prob(i_actions)
        # exp_v = tf.transpose(
        #     tf.multiply(tf.transpose(log_probs), advantage))
        # exp_v = tf.multiply(log_probs, advantage)
        i_advantage = tensor_io.getInputTensor("advantage")
        i_advantage = tf.reshape(i_advantage,
                                 [-1] + i_advantage.get_shape().as_list()[2:])
        exp_v = log_probs * tf.expand_dims(i_advantage, -1)
        entropy = dists.entropy()
        entropy_beta = tf.get_variable(
            'entropy_beta',
            shape=[],
            initializer=tf.constant_initializer(0.01),
            trainable=False)
        exp_v = entropy_beta * entropy + exp_v
        loss_policy = tf.reduce_mean(-tf.reduce_sum(exp_v, axis=-1),
                                     name='loss/policy')

        i_futurereward = tensor_io.getInputTensor("futurereward")
        i_futurereward = tf.reshape(i_futurereward, [-1] +
                                    i_futurereward.get_shape().as_list()[2:])
        loss_value = tf.reduce_mean(0.5 * tf.square(value - i_futurereward))

        loss_entropy = tf.reduce_mean(tf.reduce_sum(entropy, axis=-1),
                                      name='xentropy_loss')

        from tensorflow.contrib.layers.python.layers.regularizers import apply_regularization, l2_regularizer
        loss_l2_regularizer = apply_regularization(l2_regularizer(1e-4),
                                                   self._weights_critic)
        loss_l2_regularizer = tf.identity(loss_l2_regularizer, 'loss/l2reg')
        loss_value += loss_l2_regularizer
        loss_value = tf.identity(loss_value, name='loss/value')

        # self.cost = tf.add_n([loss_policy, loss_value * 0.1, loss_l2_regularizer])

        self._addParamSummary([('.*', ['rms', 'absmax'])])
        pred_reward = tf.reduce_mean(value, name='predict_reward')
        import tensorpack.tfutils.symbolic_functions as symbf
        advantage = symbf.rms(i_advantage, name='rms_advantage')
        self._addMovingSummary(
            loss_policy,
            loss_value,
            loss_entropy,
            pred_reward,
            advantage,
            loss_l2_regularizer,
            tf.reduce_mean(policy[:, 0], name='actor/steering/mean'),
            tf.reduce_mean(policy[:, 1], name='actor/accel/mean'),
        )
        return loss_policy, loss_value

Пример #7

Показать файл

Файл: Natural ES.Ex1.py Проект: machineCYC/Evolutionary-Algorithms

tf.set_random_seed(1)


def F(x):
    return x**2 - 2 * x + 1


def get_fitness(value):
    return -value


mean = tf.Variable(tf.constant(-30.), dtype=tf.float32)
sigma = tf.Variable(tf.constant(1.), dtype=tf.float32)
N_dist = Normal(loc=mean, scale=sigma)
make_kids = N_dist.sample([POP_SIZE])

tfkids = tf.placeholder(tf.float32, [POP_SIZE, DNA_SIZE])
tfkids_fit = tf.placeholder(tf.float32, [POP_SIZE])
loss = -tf.reduce_mean(N_dist.log_prob(tfkids) * tfkids_fit)
train_op = tf.train.GradientDescentOptimizer(LR).minimize(loss)

x = np.linspace(-70, 70, 100)
plt.plot(x, F(x))
plt.xlim(-70, 70)
plt.ylim(-100, 1000)

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

Пример #8

Показать файл

Файл: main.py Проект: waxz/ppo_torcs

    def _get_NN_prediction(self, state):
        from tensorpack.tfutils import symbolic_functions
        ctx = get_current_tower_context()
        is_training = ctx.is_training
        l = state
        # l = tf.Print(l, [state], 'State = ')
        with tf.variable_scope('critic') as vs:

            from autodrive.model.selu import fc_selu
            for lidx in range(8):
                l = fc_selu(l, 200,
                            keep_prob=1., # 由于我们只使用传感器训练，关键信息不能丢
                            is_training=is_training, name='fc-{}'.format(lidx))
            # l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc-dense')
            # for lidx, hidden_size in enumerate([300, 600]):
            #     l = tf.layers.dense(l, hidden_size, activation=tf.nn.relu, name='fc-%d'%lidx)
            value = tf.layers.dense(l, 1, name='fc-value',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))
            if not hasattr(self, '_weights_critic'):
                self._weights_critic = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        with tf.variable_scope('actor') as vs:
            l = tf.stop_gradient(l)
            mu_steering = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-steering',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            mu_accel = tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-accel',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            mus = tf.concat([mu_steering, mu_accel], axis=-1)
            # mus = tf.layers.dense(l, 2, activation=tf.nn.tanh, name='fc-mus')
            # sigmas = tf.layers.dense(l, 2, activation=tf.nn.softplus, name='fc-sigmas')
            # sigmas = tf.clip_by_value(sigmas, -0.001, 0.5)
            sigma_steering_ = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-steering',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            sigma_accel_ = 1. * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-accel',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            # sigma_beta_steering = symbolic_functions.get_scalar_var('sigma_beta_steering', 0.3, summary=True, trainable=False)
            # sigma_beta_accel = symbolic_functions.get_scalar_var('sigma_beta_accel', 0.3, summary=True, trainable=False)
            from tensorpack.tfutils.common import get_global_step_var
            sigma_beta_steering_exp = tf.train.exponential_decay(0.001, get_global_step_var(), 1000, 0.5, name='sigma/beta/steering/exp')
            sigma_beta_accel_exp = tf.train.exponential_decay(0.5, get_global_step_var(), 5000, 0.5, name='sigma/beta/accel/exp')
            # sigma_steering = tf.minimum(sigma_steering_ + sigma_beta_steering, 0.5)
            # sigma_accel = tf.minimum(sigma_accel_ + sigma_beta_accel, 0.2)
            # sigma_steering = sigma_steering_
            sigma_steering = (sigma_steering_ + sigma_beta_steering_exp)
            sigma_accel = (sigma_accel_ + sigma_beta_accel_exp) #* 0.1
            # sigma_steering = sigma_steering_
            # sigma_accel = sigma_accel_
            sigmas = tf.concat([sigma_steering, sigma_accel], axis=-1)
            #     sigma_steering = tf.clip_by_value(sigma_steering, 0.1, 0.5)

            #     sigma_accel = tf.clip_by_value(sigma_accel, 0.1, 0.5)

            # sigmas = sigmas_orig + 0.001
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigma_beta = tf.get_variable('sigma_beta', shape=[], dtype=tf.float32,
            #                              initializer=tf.constant_initializer(.5), trainable=False)

            # if is_training:
            #     pass
            #     # 如果不加sigma_beta，收敛会很慢，并且不稳定，猜测可能是以下原因：
            #     #   1、训练前期尽量大的探索可以避免网络陷入局部最优
            #     #   2、前期过小的sigma会使normal_dist的log_prob过大，导致梯度更新过大，网络一开始就畸形了，很难恢复回来
            #
            # if is_training:
            #     sigmas += sigma_beta_steering
            # sigma_steering = tf.clip_by_value(sigma_steering, sigma_beta_steering, 0.5)
            # sigma_accel = tf.clip_by_value(sigma_accel, sigma_beta_accel, 0.5)
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigmas_orig = sigmas
            # sigmas = sigmas + sigma_beta_steering
            # sigmas = tf.minimum(sigmas + 0.1, 100)
            # sigmas = tf.clip_by_value(sigmas, sigma_beta_steering, 1)
            # sigma_steering += sigma_beta_steering
            # sigma_accel += sigma_beta_accel

            # mus = tf.concat([mu_steering, mu_accel], axis=-1)

            from tensorflow.contrib.distributions import Normal
            dists = Normal(mus, sigmas+1e-3)
            actions = tf.squeeze(dists.sample([1]), [0])
            # 裁剪到一倍方差之内
            # actions = tf.clip_by_value(actions, -1., 1.)
            if is_training:
                summary.add_moving_summary(tf.reduce_mean(mu_steering, name='mu/steering/mean'),
                                           tf.reduce_mean(mu_accel, name='mu/accel/mean'),
                                           tf.reduce_mean(sigma_steering, name='sigma/steering/mean'),
                                           tf.reduce_max(sigma_steering, name='sigma/steering/max'),
                                           tf.reduce_mean(sigma_accel, name='sigma/accel/mean'),
                                           tf.reduce_max(sigma_accel, name='sigma/accel/max'),
                                           sigma_beta_accel_exp,
                                           sigma_beta_steering_exp,
                                           )
            # actions = tf.Print(actions, [mus, sigmas, tf.concat([sigma_steering_, sigma_accel_], -1), actions],
            #                    'mu/sigma/sigma.orig/act=', summarize=4)
            if not hasattr(self, '_weights_actor'):
                self._weights_actor = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        return actions, value, dists

Пример #9

Показать файл

Файл: main.py Проект: waxz/ppo_torcs

    def _get_NN_prediction(self, state):
        from tensorpack.tfutils import symbolic_functions
        ctx = get_current_tower_context()
        is_training = ctx.is_training
        l = state
        # l = tf.Print(l, [state], 'State = ')
        with tf.variable_scope('critic') as vs:

            from autodrive.model.selu import fc_selu
            for lidx in range(8):
                l = fc_selu(
                    l,
                    200,
                    keep_prob=1.,  # 由于我们只使用传感器训练，关键信息不能丢
                    is_training=is_training,
                    name='fc-{}'.format(lidx))
            # l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc-dense')
            # for lidx, hidden_size in enumerate([300, 600]):
            #     l = tf.layers.dense(l, hidden_size, activation=tf.nn.relu, name='fc-%d'%lidx)
            value = tf.layers.dense(l, 1, name='fc-value',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))
            if not hasattr(self, '_weights_critic'):
                self._weights_critic = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        with tf.variable_scope('actor') as vs:
            l = tf.stop_gradient(l)
            mu_steering = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-steering',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            mu_accel = tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-accel',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            mus = tf.concat([mu_steering, mu_accel], axis=-1)
            # mus = tf.layers.dense(l, 2, activation=tf.nn.tanh, name='fc-mus')
            # sigmas = tf.layers.dense(l, 2, activation=tf.nn.softplus, name='fc-sigmas')
            # sigmas = tf.clip_by_value(sigmas, -0.001, 0.5)
            sigma_steering_ = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-steering',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            sigma_accel_ = 1. * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-accel',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            # sigma_beta_steering = symbolic_functions.get_scalar_var('sigma_beta_steering', 0.3, summary=True, trainable=False)
            # sigma_beta_accel = symbolic_functions.get_scalar_var('sigma_beta_accel', 0.3, summary=True, trainable=False)
            from tensorpack.tfutils.common import get_global_step_var
            sigma_beta_steering_exp = tf.train.exponential_decay(
                0.001,
                get_global_step_var(),
                1000,
                0.5,
                name='sigma/beta/steering/exp')
            sigma_beta_accel_exp = tf.train.exponential_decay(
                0.5,
                get_global_step_var(),
                5000,
                0.5,
                name='sigma/beta/accel/exp')
            # sigma_steering = tf.minimum(sigma_steering_ + sigma_beta_steering, 0.5)
            # sigma_accel = tf.minimum(sigma_accel_ + sigma_beta_accel, 0.2)
            # sigma_steering = sigma_steering_
            sigma_steering = (sigma_steering_ + sigma_beta_steering_exp)
            sigma_accel = (sigma_accel_ + sigma_beta_accel_exp)  #* 0.1
            # sigma_steering = sigma_steering_
            # sigma_accel = sigma_accel_
            sigmas = tf.concat([sigma_steering, sigma_accel], axis=-1)
            #     sigma_steering = tf.clip_by_value(sigma_steering, 0.1, 0.5)

            #     sigma_accel = tf.clip_by_value(sigma_accel, 0.1, 0.5)

            # sigmas = sigmas_orig + 0.001
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigma_beta = tf.get_variable('sigma_beta', shape=[], dtype=tf.float32,
            #                              initializer=tf.constant_initializer(.5), trainable=False)

            # if is_training:
            #     pass
            #     # 如果不加sigma_beta，收敛会很慢，并且不稳定，猜测可能是以下原因：
            #     #   1、训练前期尽量大的探索可以避免网络陷入局部最优
            #     #   2、前期过小的sigma会使normal_dist的log_prob过大，导致梯度更新过大，网络一开始就畸形了，很难恢复回来
            #
            # if is_training:
            #     sigmas += sigma_beta_steering
            # sigma_steering = tf.clip_by_value(sigma_steering, sigma_beta_steering, 0.5)
            # sigma_accel = tf.clip_by_value(sigma_accel, sigma_beta_accel, 0.5)
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigmas_orig = sigmas
            # sigmas = sigmas + sigma_beta_steering
            # sigmas = tf.minimum(sigmas + 0.1, 100)
            # sigmas = tf.clip_by_value(sigmas, sigma_beta_steering, 1)
            # sigma_steering += sigma_beta_steering
            # sigma_accel += sigma_beta_accel

            # mus = tf.concat([mu_steering, mu_accel], axis=-1)

            from tensorflow.contrib.distributions import Normal
            dists = Normal(mus, sigmas + 1e-3)
            actions = tf.squeeze(dists.sample([1]), [0])
            # 裁剪到一倍方差之内
            # actions = tf.clip_by_value(actions, -1., 1.)
            if is_training:
                summary.add_moving_summary(
                    tf.reduce_mean(mu_steering, name='mu/steering/mean'),
                    tf.reduce_mean(mu_accel, name='mu/accel/mean'),
                    tf.reduce_mean(sigma_steering, name='sigma/steering/mean'),
                    tf.reduce_max(sigma_steering, name='sigma/steering/max'),
                    tf.reduce_mean(sigma_accel, name='sigma/accel/mean'),
                    tf.reduce_max(sigma_accel, name='sigma/accel/max'),
                    sigma_beta_accel_exp,
                    sigma_beta_steering_exp,
                )
            # actions = tf.Print(actions, [mus, sigmas, tf.concat([sigma_steering_, sigma_accel_], -1), actions],
            #                    'mu/sigma/sigma.orig/act=', summarize=4)
            if not hasattr(self, '_weights_actor'):
                self._weights_actor = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        return actions, value, dists

Пример #10

Показать файл

Файл: modeling.py Проект: NHLBI-BCB/scVAE

class VariationalAutoEncoder(object):
    def __init__(self,
                 feature_size,
                 latent_size,
                 hidden_sizes,
                 reconstruction_distribution=None,
                 number_of_reconstruction_classes=None,
                 use_batch_norm=True,
                 use_count_sum=True,
                 epsilon=1e-6):

        # Setup

        super(VariationalAutoEncoder, self).__init__()

        self.feature_size = feature_size
        self.latent_size = latent_size
        self.hidden_sizes = hidden_sizes

        self.reconstruction_distribution_name = reconstruction_distribution
        self.reconstruction_distribution = distributions[
            reconstruction_distribution]

        self.k_max = number_of_reconstruction_classes

        self.use_batch_norm = use_batch_norm
        self.use_count_sum = use_count_sum

        self.epsilon = epsilon

        # self.graph = tf.Graph()

        self.x = tf.placeholder(tf.float32, [None, self.feature_size],
                                'x')  # counts

        if self.use_count_sum:
            self.n = tf.placeholder(tf.float32, [None, 1],
                                    'N')  # total counts sum

        self.learning_rate = tf.placeholder(tf.float32, [], 'learning_rate')
        self.warm_up_weight = tf.placeholder(tf.float32, [], 'warm_up_weight')

        self.is_training = tf.placeholder(tf.bool, [], 'phase')

        self.inference()
        self.loss()
        self.training()

        self.summary = tf.summary.merge_all()

        for parameter in tf.trainable_variables():
            print(parameter.name, parameter.get_shape())

    @property
    def name(self):

        #model_name = dataSetBaseName(splitting_method, splitting_fraction,
        #filtering_method, feature_selection, feature_size)

        model_name = self.reconstruction_distribution_name.replace(" ", "_")

        # if self.k_max:
        #     model_name += "_c_" + str(self.k_max)

        if self.use_count_sum:
            model_name += "_sum"

        model_name += "_l_" + str(self.latent_size) + "_h_" + "_".join(
            map(str, self.hidden_sizes))

        if self.use_batch_norm:
            model_name += "_bn"

        # model_name += "_lr_{:.1g}".format(self.learning_rate)
        # model_name += "_b_" + str(self.batch_size)
        # model_name += "_wu_" + str(number_of_warm_up_epochs)

        # model_name += "_e_" + str(number_of_epochs)

        return model_name

    def inference(self):

        encoder = self.x

        with tf.variable_scope("ENCODER"):
            for i, hidden_size in enumerate(self.hidden_sizes):
                encoder = dense_layer(inputs=encoder,
                                      num_outputs=hidden_size,
                                      activation_fn=relu,
                                      use_batch_norm=self.use_batch_norm,
                                      is_training=self.is_training,
                                      scope='{:d}'.format(i + 1))

        with tf.variable_scope("Z"):
            z_mu = dense_layer(inputs=encoder,
                               num_outputs=self.latent_size,
                               activation_fn=None,
                               use_batch_norm=False,
                               is_training=self.is_training,
                               scope='MU')

            z_sigma = dense_layer(
                inputs=encoder,
                num_outputs=self.latent_size,
                activation_fn=lambda x: tf.exp(tf.clip_by_value(x, -3, 3)),
                use_batch_norm=False,
                is_training=self.is_training,
                scope='SIGMA')

            self.q_z_given_x = Normal(mu=z_mu, sigma=z_sigma)

            # Mean of z
            self.z_mean = self.q_z_given_x.mean()

            # Stochastic layer
            self.z = self.q_z_given_x.sample()

        # Decoder - Generative model, p(x|z)

        if self.use_count_sum:
            decoder = tf.concat([self.z, self.n], axis=1, name='Z_N')
        else:
            decoder = self.z

        with tf.variable_scope("DECODER"):
            for i, hidden_size in enumerate(reversed(self.hidden_sizes)):
                decoder = dense_layer(
                    inputs=decoder,
                    num_outputs=hidden_size,
                    activation_fn=relu,
                    use_batch_norm=self.use_batch_norm,
                    is_training=self.is_training,
                    scope='{:d}'.format(len(self.hidden_sizes) - i))

        # Reconstruction distribution parameterisation

        with tf.variable_scope("X_TILDE"):

            x_theta = {}

            for parameter in self.reconstruction_distribution["parameters"]:

                parameter_activation_function = \
                    self.reconstruction_distribution["parameters"]\
                    [parameter]["activation function"]
                p_min, p_max = \
                    self.reconstruction_distribution["parameters"]\
                    [parameter]["support"]

                x_theta[parameter] = dense_layer(
                    inputs=decoder,
                    num_outputs=self.feature_size,
                    activation_fn=lambda x: tf.clip_by_value(
                        parameter_activation_function(x), p_min + self.epsilon,
                        p_max - self.epsilon),
                    is_training=self.is_training,
                    scope=parameter.upper())

            self.p_x_given_z = self.reconstruction_distribution["class"](
                x_theta)

            if self.k_max:

                x_logits = dense_layer(inputs=decoder,
                                       num_outputs=self.feature_size *
                                       self.k_max,
                                       activation_fn=None,
                                       is_training=self.is_training,
                                       scope="P_K")

                x_logits = tf.reshape(x_logits,
                                      [-1, self.feature_size, self.k_max])

                self.p_x_given_z = Categorized(
                    dist=self.p_x_given_z, cat=Categorical(logits=x_logits))

            self.x_tilde_mean = self.p_x_given_z.mean()

        # Add histogram summaries for the trainable parameters
        for parameter in tf.trainable_variables():
            tf.summary.histogram(parameter.name, parameter)

    def loss(self):

        # Recognition prior
        p_z_mu = tf.constant(0.0, dtype=tf.float32)
        p_z_sigma = tf.constant(1.0, dtype=tf.float32)
        p_z = Normal(p_z_mu, p_z_sigma)

        # Loss

        ## Reconstruction error
        log_p_x_given_z = tf.reduce_mean(tf.reduce_sum(
            self.p_x_given_z.log_prob(self.x), axis=1),
                                         name='reconstruction_error')
        tf.add_to_collection('losses', log_p_x_given_z)

        ## Regularisation
        KL_qp = tf.reduce_mean(tf.reduce_sum(kl(self.q_z_given_x, p_z),
                                             axis=1),
                               name="kl_divergence")
        tf.add_to_collection('losses', KL_qp)

        # Averaging over samples.
        self.loss_op = tf.subtract(log_p_x_given_z, KL_qp, name='lower_bound')
        tf.add_to_collection('losses', self.loss_op)

        # Add scalar summaries for the losses
        for l in tf.get_collection('losses'):
            tf.summary.scalar(l.op.name, l)

    def training(self):

        # Create the gradient descent optimiser with the given learning rate.
        def setupTraining():

            # Optimizer and training objective of negative loss
            optimiser = tf.train.AdamOptimizer(self.learning_rate)

            # Create a variable to track the global step.
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)

            # Use the optimiser to apply the gradients that minimize the loss
            # (and also increment the global step counter) as a single training
            # step.
            self.train_op = optimiser.minimize(-self.loss_op,
                                               global_step=self.global_step)

        # Make sure that the updates of the moving_averages in batch_norm
        # layers are performed before the train_step.

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        if update_ops:
            updates = tf.group(*update_ops)
            with tf.control_dependencies([updates]):
                setupTraining()
        else:
            setupTraining()

    def train(self,
              train_data,
              valid_data,
              number_of_epochs=50,
              batch_size=100,
              learning_rate=1e-3,
              log_directory=None,
              reset_training=False):

        if self.use_count_sum:
            n_train = train_data.counts.sum(axis=1).reshape(-1, 1)
            n_valid = valid_data.counts.sum(axis=1).reshape(-1, 1)

        if reset_training and os.path.exists(log_directory):
            for f in os.listdir(log_directory):
                os.remove(os.path.join(log_directory, f))
            os.rmdir(log_directory)

        # Train

        M = train_data.number_of_examples

        self.saver = tf.train.Saver()
        checkpoint_file = os.path.join(log_directory, 'model.ckpt')

        with tf.Session() as session:

            summary_writer = tf.summary.FileWriter(log_directory,
                                                   session.graph)

            session.run(tf.global_variables_initializer())

            # Print out the defined graph
            # print("The inference graph:")
            # print(tf.get_default_graph().as_graph_def())

            #train_losses, valid_losses = [], []
            feed_dict_train = {
                self.x: train_data.counts,
                self.is_training: False
            }
            feed_dict_valid = {
                self.x: valid_data.counts,
                self.is_training: False
            }
            if self.use_count_sum:
                feed_dict_train[self.n] = n_train
                feed_dict_valid[self.n] = n_valid

            for epoch in range(number_of_epochs):
                shuffled_indices = numpy.random.permutation(M)
                for i in range(0, M, batch_size):

                    step = session.run(self.global_step)

                    start_time = time()

                    # Feeding in batch to model
                    subset = shuffled_indices[i:(i + batch_size)]
                    batch = train_data.counts[subset]
                    feed_dict_batch = {
                        self.x: batch,
                        self.is_training: True,
                        self.learning_rate: learning_rate
                    }

                    # Adding the sum of counts per cell to the generator after the sample layer.
                    if self.use_count_sum:
                        feed_dict_batch[self.n] = n_train[subset]

                    # Run the stochastic batch training operation.
                    _, batch_loss = session.run([self.train_op, self.loss_op],
                                                feed_dict=feed_dict_batch)

                    # Duration of one training step.
                    duration = time() - start_time

                    # Evaluation printout and TensorBoard summary
                    if step % 10 == 0:
                        print('Step {:d}: loss = {:.2f} ({:.3f} sec)'.format(
                            int(step), batch_loss, duration))
                        summary_str = session.run(self.summary,
                                                  feed_dict=feed_dict_batch)
                        summary_writer.add_summary(summary_str, step)
                        summary_writer.flush()

                # Saving model parameters
                print('Checkpoint reached: Saving model')
                self.saver.save(session, checkpoint_file)
                print('Done saving model')

                # Evaluation
                print('Evaluating epoch {:d}'.format(epoch))

                train_loss = 0
                for i in range(0, M, batch_size):
                    subset = slice(i, (i + batch_size))
                    batch = train_data.counts[subset]
                    feed_dict_batch = {self.x: batch, self.is_training: False}
                    if self.use_count_sum:
                        feed_dict_batch[self.n] = n_train[subset]
                    train_loss += session.run(self.loss_op,
                                              feed_dict=feed_dict_batch)
                train_loss /= M / batch_size
                print('Done evaluating training set')

                valid_loss = 0
                for i in range(0, valid_data.number_of_examples, batch_size):
                    subset = slice(i, (i + batch_size))
                    batch = valid_data.counts[subset]
                    feed_dict_batch = {self.x: batch, self.is_training: False}
                    if self.use_count_sum:
                        feed_dict_batch[self.n] = n_valid[subset]
                    valid_loss += session.run(self.loss_op,
                                              feed_dict=feed_dict_batch)
                valid_loss /= valid_data.number_of_examples / batch_size
                print('Done evaluating validation set')

                print("Epoch %d: ELBO: %g (Train), %g (Valid)" %
                      (epoch + 1, train_loss, valid_loss))

    def evaluate(self, test_set, batch_size=100, log_directory=None):

        checkpoint = tf.train.get_checkpoint_state(log_directory)

        if self.use_count_sum:
            n_test = test_set.counts.sum(axis=1).reshape(-1, 1)

        with tf.Session() as session:

            if checkpoint and checkpoint.model_checkpoint_path:
                self.saver.restore(session, checkpoint.model_checkpoint_path)

            lower_bound_test = 0
            recon_mean_test = numpy.empty(
                [test_set.number_of_examples, test_set.number_of_features])
            z_mu_test = numpy.empty(
                [test_set.number_of_examples, self.latent_size])
            for i in range(0, test_set.number_of_examples, batch_size):
                subset = slice(i, (i + batch_size))
                batch = test_set.counts[subset]
                feed_dict_batch = {self.x: batch, self.is_training: False}
                if self.use_count_sum:
                    feed_dict_batch[self.n] = n_test[subset]
                lower_bound_batch, recon_mean_batch, z_mu_batch = session.run(
                    [self.loss_op, self.x_tilde_mean, self.z_mean],
                    feed_dict=feed_dict_batch)
                lower_bound_test += lower_bound_batch
                recon_mean_test[subset] = recon_mean_batch
                z_mu_test[subset] = z_mu_batch
            lower_bound_test /= test_set.number_of_examples / batch_size

            metrics_test = {"LL_test": lower_bound_test}

            print(metrics_test)

            return recon_mean_test, z_mu_test, metrics_test

Пример #11

Показать файл

Файл: layers.py Проект: arieling/bayesian-nn

def gaussian_layer(x,
                   in_dim,
                   out_dim,
                   scope,
                   activation_fn=tf.nn.relu,
                   reuse=False,
                   use_mean=False,
                   store=False,
                   use_stored=False,
                   prior_stddev=1.0,
                   l2_const=0.0):
    """Single layer of fully-connected units where the weights follow a
    unit gaussian prior, and
    Args:
        x: batch of input
        in_dim: input dimension
        out_dim: output dimension
        scope: tensorflow variable scope name
        activation_fn: activation function
        use_mean: use the mean of approximate posterior, instead of sampling
        closed_form_kl: return closed form kl
    Returns:
        output and kl of the weights for the layer
    """

    prior_var = prior_stddev**2

    with tf.variable_scope(scope, reuse=reuse):

        w_mean = tf.get_variable('w_mean',
                                 shape=[in_dim, out_dim],
                                 initializer=xi())
        w_row = tf.get_variable('w_row',
                                shape=[in_dim, out_dim],
                                initializer=ni(-3.0, 0.1))
        w_stddev = tf.nn.softplus(w_row, name='w_std') + eps
        w_dist = Normal([0.0] * in_dim * out_dim, [1.0] * in_dim * out_dim)
        w_std_sample = tf.reshape(w_dist.sample(), [in_dim, out_dim],
                                  name='w_std_sample')

        # local reparametrization
        w_sample = w_mean + w_std_sample * w_stddev
        b = tf.get_variable('b',
                            shape=[out_dim],
                            dtype=tf.float32,
                            initializer=xi())

        # to store the previous theta value
        w_last = tf.get_variable('w_last',
                                 initializer=tf.zeros([in_dim, out_dim]),
                                 trainable=False)

        if use_mean:
            out = activation_fn(tf.matmul(x, w_mean) + b, name='activation')
            return out, 0.0
        else:
            if store:
                store_op = tf.assign(w_last, w_sample)
                with tf.control_dependencies([store_op]):
                    out = activation_fn(tf.matmul(x, w_sample) + b,
                                        name='activation')
            else:
                if use_stored:
                    out = activation_fn(tf.matmul(x, w_last) + b,
                                        name='activation')
                else:
                    out = activation_fn(tf.matmul(x, w_sample) + b,
                                        name='activation')

            D = in_dim * out_dim
            kl = tf.log(prior_stddev) * D - \
                 tf.reduce_sum(tf.log(w_stddev+eps)) + \
                 0.5*(-D +
                     (tf.reduce_sum(w_stddev**2) +
                      tf.reduce_sum(w_mean**2)) / prior_var)
            return out, kl

Пример #12

Показать файл

    def fit(self,
            data,
            epochs=1000,
            max_seconds=600,
            activation=tf.nn.elu,
            batch_norm_decay=0.9,
            learning_rate=1e-5,
            batch_sz=1024,
            adapt_lr=False,
            print_progress=True,
            show_fig=True):

        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

        # static features
        X = data['X_train_static_mins']
        N, D = X.shape
        self.X = tf.placeholder(tf.float32, shape=(None, D), name='X')

        # timeseries features
        X_time = data['X_train_time_0']
        T1, N1, D1 = X_time.shape
        assert N == N1
        self.X_time = tf.placeholder(tf.float32,
                                     shape=(T1, None, D1),
                                     name='X_time')
        self.train = tf.placeholder(tf.bool, shape=(), name='train')
        self.rnn_keep_p_encode = tf.placeholder(tf.float32,
                                                shape=(),
                                                name='rnn_keep_p_encode')
        self.rnn_keep_p_decode = tf.placeholder(tf.float32,
                                                shape=(),
                                                name='rnn_keep_p_decode')
        adp_learning_rate = tf.placeholder(tf.float32,
                                           shape=(),
                                           name='adp_learning_rate')

        he_init = variance_scaling_initializer()
        bn_params = {
            'is_training': self.train,
            'decay': batch_norm_decay,
            'updates_collections': None
        }
        latent_size = self.encoder_layer_sizes[-1]

        inputs = self.X
        with tf.variable_scope('static_encoder'):
            for layer_size, keep_p in zip(self.encoder_layer_sizes[:-1],
                                          self.encoder_dropout[:-1]):
                inputs = dropout(inputs, keep_p, is_training=self.train)
                inputs = fully_connected(inputs,
                                         layer_size,
                                         weights_initializer=he_init,
                                         activation_fn=activation,
                                         normalizer_fn=batch_norm,
                                         normalizer_params=bn_params)

        if self.rnn_encoder_layer_sizes:
            with tf.variable_scope('rnn_encoder'):
                rnn_cell = MultiRNNCell([
                    LayerNormBasicLSTMCell(
                        s,
                        activation=tf.tanh,
                        dropout_keep_prob=self.rnn_encoder_dropout)
                    for s in self.rnn_encoder_layer_sizes
                ])
                time_inputs, states = tf.nn.dynamic_rnn(rnn_cell,
                                                        self.X_time,
                                                        swap_memory=True,
                                                        time_major=True,
                                                        dtype=tf.float32)
                time_inputs = tf.transpose(time_inputs, perm=(1, 0, 2))
                time_inputs = tf.reshape(
                    time_inputs,
                    shape=(-1, self.rnn_encoder_layer_sizes[-1] * T1))

            inputs = tf.concat([inputs, time_inputs], axis=1)

        with tf.variable_scope('latent_space'):
            inputs = dropout(inputs,
                             self.encoder_dropout[-1],
                             is_training=self.train)
            loc = fully_connected(inputs,
                                  latent_size,
                                  weights_initializer=he_init,
                                  activation_fn=None,
                                  normalizer_fn=batch_norm,
                                  normalizer_params=bn_params)
            scale = fully_connected(inputs,
                                    latent_size,
                                    weights_initializer=he_init,
                                    activation_fn=tf.nn.softplus,
                                    normalizer_fn=batch_norm,
                                    normalizer_params=bn_params)

            standard_normal = Normal(loc=np.zeros(latent_size,
                                                  dtype=np.float32),
                                     scale=np.ones(latent_size,
                                                   dtype=np.float32))
            e = standard_normal.sample(tf.shape(loc)[0])
            outputs = e * scale + loc

            static_output_size = self.decoder_layer_sizes[0]
            if self.rnn_decoder_layer_sizes:
                time_output_size = self.rnn_decoder_layer_sizes[0] * T1
                output_size = static_output_size + time_output_size
            else:
                output_size = static_output_size
            outputs = fully_connected(outputs,
                                      output_size,
                                      weights_initializer=he_init,
                                      activation_fn=activation,
                                      normalizer_fn=batch_norm,
                                      normalizer_params=bn_params)
            if self.rnn_decoder_layer_sizes:
                outputs, time_outputs = tf.split(
                    outputs, [static_output_size, time_output_size], axis=1)

        with tf.variable_scope('static_decoder'):
            for layer_size, keep_p in zip(self.decoder_layer_sizes,
                                          self.decoder_dropout[:-1]):
                outputs = dropout(outputs, keep_p, is_training=self.train)
                outputs = fully_connected(outputs,
                                          layer_size,
                                          weights_initializer=he_init,
                                          activation_fn=activation,
                                          normalizer_fn=batch_norm,
                                          normalizer_params=bn_params)
            outputs = dropout(outputs,
                              self.decoder_dropout[-1],
                              is_training=self.train)
            outputs = fully_connected(outputs,
                                      D,
                                      weights_initializer=he_init,
                                      activation_fn=None,
                                      normalizer_fn=batch_norm,
                                      normalizer_params=bn_params)

            X_hat = Bernoulli(logits=outputs)
            self.posterior_predictive = X_hat.sample()
            self.posterior_predictive_probs = tf.nn.sigmoid(outputs)

        if self.rnn_decoder_layer_sizes:
            with tf.variable_scope('rnn_decoder'):
                self.rnn_decoder_layer_sizes.append(D1)
                time_output_size = self.rnn_decoder_layer_sizes[0]
                time_outputs = tf.reshape(time_outputs,
                                          shape=(-1, T1, time_output_size))
                time_outputs = tf.transpose(time_outputs, perm=(1, 0, 2))
                rnn_cell = MultiRNNCell([
                    LayerNormBasicLSTMCell(
                        s,
                        activation=tf.tanh,
                        dropout_keep_prob=self.rnn_decoder_dropout)
                    for s in self.rnn_decoder_layer_sizes
                ])
                time_outputs, states = tf.nn.dynamic_rnn(rnn_cell,
                                                         time_outputs,
                                                         swap_memory=True,
                                                         time_major=True,
                                                         dtype=tf.float32)
                time_outputs = tf.transpose(time_outputs, perm=(1, 0, 2))
                time_outputs = tf.reshape(time_outputs, shape=(-1, T1 * D1))
                X_hat_time = Bernoulli(logits=time_outputs)
                posterior_predictive_time = X_hat_time.sample()
                posterior_predictive_time = tf.reshape(
                    posterior_predictive_time, shape=(-1, T1, D1))
                self.posterior_predictive_time = tf.transpose(
                    posterior_predictive_time, perm=(1, 0, 2))
                self.posterior_predictive_probs_time = tf.nn.sigmoid(
                    time_outputs)

        kl_div = -tf.log(scale) + 0.5 * (scale**2 + loc**2) - 0.5
        kl_div = tf.reduce_sum(kl_div, axis=1)

        expected_log_likelihood = tf.reduce_sum(X_hat.log_prob(self.X), axis=1)
        X_time_trans = tf.transpose(self.X_time, perm=(1, 0, 2))
        X_time_reshape = tf.reshape(X_time_trans, shape=(-1, T1 * D1))
        if self.rnn_encoder_layer_sizes:
            expected_log_likelihood_time = tf.reduce_sum(
                X_hat_time.log_prob(X_time_reshape), axis=1)
            elbo = -tf.reduce_sum(expected_log_likelihood +
                                  expected_log_likelihood_time - kl_div)
        else:
            elbo = -tf.reduce_sum(expected_log_likelihood - kl_div)
        train_op = tf.train.AdamOptimizer(
            learning_rate=adp_learning_rate).minimize(elbo)

        tf.summary.scalar('elbo', elbo)
        if self.save_file:
            saver = tf.train.Saver()

        if self.tensorboard:
            for v in tf.trainable_variables():
                tf.summary.histogram(v.name, v)
            train_merge = tf.summary.merge_all()
            writer = tf.summary.FileWriter(self.tensorboard)

        self.init_op = tf.global_variables_initializer()
        n = 0
        n_batches = N // batch_sz
        costs = list()
        min_cost = np.inf

        t0 = dt.now()
        with tf.Session() as sess:
            sess.run(self.init_op)
            for epoch in range(epochs):
                idxs = shuffle(range(N))
                X_train = X[idxs]
                X_train_time = X_time[:, idxs]

                for batch in range(n_batches):
                    n += 1
                    X_batch = X_train[batch * batch_sz:(batch + 1) * batch_sz]
                    X_batch_time = X_train_time[:,
                                                batch * batch_sz:(batch + 1) *
                                                batch_sz]

                    sess.run(train_op,
                             feed_dict={
                                 self.X: X_batch,
                                 self.X_time: X_batch_time,
                                 self.rnn_keep_p_encode:
                                 self.rnn_encoder_dropout,
                                 self.rnn_keep_p_decode:
                                 self.rnn_decoder_dropout,
                                 self.train: True,
                                 adp_learning_rate: learning_rate
                             })
                    if n % 100 == 0 and print_progress:
                        cost = sess.run(elbo,
                                        feed_dict={
                                            self.X: X,
                                            self.X_time: X_time,
                                            self.rnn_keep_p_encode: 1.0,
                                            self.rnn_keep_p_decode: 1.0,
                                            self.train: False
                                        })
                        cost /= N
                        costs.append(cost)

                        if adapt_lr and epoch > 0:
                            if cost < min_cost:
                                min_cost = cost
                            elif cost > min_cost * 1.01:
                                learning_rate *= 0.75
                                if print_progress:
                                    print('Updating Learning Rate',
                                          learning_rate)

                        print('Epoch:', epoch, 'Batch:', batch, 'Cost:', cost)

                        if self.tensorboard:
                            train_sum = sess.run(train_merge,
                                                 feed_dict={
                                                     self.X: X,
                                                     self.X_time: X_time,
                                                     self.rnn_keep_p_encode:
                                                     1.0,
                                                     self.rnn_keep_p_decode:
                                                     1.0,
                                                     self.train: False
                                                 })
                            writer.add_summary(train_sum, n)

                seconds = (dt.now() - t0).seconds
                if seconds > max_seconds:
                    if print_progress:
                        print('Breaking after', seconds, 'seconds')
                    break

            if self.save_file:
                saver.save(sess, self.save_file)

            if self.tensorboard:
                writer.add_graph(sess.graph)

        if show_fig:
            plt.plot(costs)
            plt.title('Costs and Scores')
            plt.show()

Пример #13

Показать файл

    def step(self,
             time,
             inputs,
             input_latent_sample,
             states,
             use_inference,
             name=None):
        """Perform a decoding step.
    Args:
      time: scalar `int32`.
      inputs: A (structure of) input tensors.
      input_latent_sample: Can override sampling of new latent.
      states: A (structure of) state tensors and TensorArrays.
      use_inference: If True overrides checks for inference or prior network usage and
          always uses inference network.
      name: Name scope for any created operations.
    Returns:
      `output_frame, inference_dist, prior_dist, states`.
    """
        cell_outputs, cell_states = dict(), dict()
        if self._prev_inputs is None:
            raise ValueError("Need previous input for VariationalDecoder!")

        with ops.name_scope(name, "VariationalDecoderStep",
                            (time, inputs, states)):

            if input_latent_sample is None:
                # predict inference distribution from current frame if any
                if inputs is not None:
                    cell_outputs['inference'], cell_states['inference'] = \
                      self._cells['inference'](self._maybe_encode_inputs(inputs), states['inference'])
                else:
                    cell_outputs['inference'], cell_states[
                        'inference'] = None, None

                # predict learned prior from previous frame
                if not self._fixed_prior:
                    cell_outputs['prior'], cell_states['prior'] = \
                      self._cells['prior'](self._maybe_encode_inputs(self._prev_inputs), states['prior'])
                else:
                    means = tf.zeros([self._batch_size, self._sample_dim])
                    log_std_dev = tf.log(
                        tf.constant(1.0,
                                    shape=[self._batch_size,
                                           self._sample_dim]))
                    cell_outputs['prior'] = tf.concat([means, log_std_dev],
                                                      axis=1)

                # sample from inference or prior distribution
                if use_inference:
                    means = cell_outputs['inference'][..., :self._sample_dim]
                    std_dev = tf.exp(
                        cell_outputs['inference'][..., self._sample_dim:])
                else:
                    means = cell_outputs['prior'][..., :self._sample_dim]
                    std_dev = tf.exp(cell_outputs['prior'][...,
                                                           self._sample_dim:])

                z_dists = Normal(loc=means, scale=std_dev)
                z_sample = tf.squeeze(z_dists.sample(
                    [1]))  # sample one sample from each distribution
                if tf.flags.FLAGS.trajectory_space and not tf.flags.FLAGS.trajectory_autoencoding:
                    z_sample = tf.concat([
                        z_sample,
                        tf.zeros(z_sample.get_shape().as_list()[:-1] + [1],
                                 dtype=tf.float32)
                    ],
                                         axis=-1)
            else:
                z_sample = input_latent_sample
                cell_outputs['inference'] = None
                cell_outputs['prior'] = None

            # reconstruct output with LSTM and decoder
            if self._use_cdna_model:
                decoder_input = [
                    self._prev_inputs, self._first_image, z_sample,
                    self._is_training
                ]
            else:
                decoder_input = tf.concat((self._prev_inputs, z_sample),
                                          axis=-1)
            cell_outputs['output'], cell_states['output'] = \
              self._cells['output'](decoder_input, states['output'])
            if self._output_layer is not None:
                cell_outputs['output'] = self._output_layer(
                    cell_outputs['output'])

        return cell_outputs['output'], cell_outputs['inference'], \
               cell_outputs['prior'], cell_states, z_sample