def gauss_sample(gauss_params, quant_chann, use_log_scales=True): mean, std = mean_std_from_out_params(gauss_params, use_log_scales) distribution = Normal(loc=mean, scale=std) x = distribution.sample() x = tf.clip_by_value(x, -1., 1. - 2. / quant_chann) x_quantized = utils.cast_quantize(x, quant_chann) return x_quantized
def make_dists_and_sample(latent_sample_seq): # latent_sample_seq constists of means and log_stds latent_dim = int(latent_sample_seq.get_shape().as_list()[-1] / 2) latent_dists = Normal(loc=latent_sample_seq[..., :latent_dim], scale=tf.exp(latent_sample_seq[..., latent_dim:])) latent_sample_seq = tf.squeeze(latent_dists.sample( [1])) # sample one sample from each distribution return latent_dists, latent_sample_seq
def _sample(self, mu, std_dev): """ Sample from parametrized Gaussian distribution. :param mu: Gaussian mean. :param std_dev: Standard deviation of the Gaussian. :return: Sample z. """ z_dists = Normal(loc=mu, scale=std_dev) z = tf.squeeze(z_dists.sample( [1])) # sample one sample from each distribution return z
def main_pendulum(logdir, seed, n_iter, gamma, min_timesteps_per_batch, initial_stepsize, desired_kl, vf_type, vf_params, animate=False): tf.set_random_seed(seed) np.random.seed(seed) env = gym.make("Pendulum-v0") ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.shape[0] logz.configure_output_dir(logdir) if vf_type == 'linear': vf = LinearValueFunction(**vf_params) elif vf_type == 'nn': vf = NnValueFunction(ob_dim=ob_dim, **vf_params) #### # YOUR_CODE_HERE # batch of observations sy_ob_no = tf.placeholder(shape=[None, ob_dim], name="ob", dtype=tf.float32) # batch of actions sy_ac_n = tf.placeholder(shape=[None], name="ac", dtype=tf.float32) # batch of advantage function estimates sy_adv_n = tf.placeholder(shape=[None], name="adv", dtype=tf.float32) # 2-layer network to learn state from observation sy_h1 = lrelu(dense(sy_ob_no, 32, "h1", weight_init=normc_initializer(1.0))) sy_h2 = lrelu(dense(sy_h1, 32, "h2", weight_init=normc_initializer(1.0))) # Mean control output sy_mean_na = dense(sy_h2, ac_dim, "mean", weight_init=normc_initializer(0.1)) # Variance logstd_a = tf.get_variable("logstdev", [ac_dim]) # define action distribution sy_ac_distr = Normal(mu=tf.squeeze(sy_mean_na), sigma=tf.exp(logstd_a), validate_args=True) # sampled actions, used for defining the policy # (NOT computing the policy gradient) sy_sampled_ac = tf.squeeze(sy_ac_distr.sample(sample_shape=[ac_dim])) sy_n = tf.shape(sy_ob_no)[0] sy_logprob_n = sy_ac_distr.log_pdf(sy_ac_n) # used for computing KL and entropy, JUST FOR DIAGNOSTIC PURPOSES sy_oldmean_na = tf.placeholder(shape=[None, ac_dim], name='oldmean', dtype=tf.float32) sy_oldlogstd_a = tf.placeholder(shape=[ac_dim], name="oldlogstdev", dtype=tf.float32) sy_ac_olddistr = Normal(mu=tf.squeeze(sy_oldmean_na), sigma=tf.exp(sy_oldlogstd_a), validate_args=True) sy_kl = tf.reduce_mean( tf.contrib.distributions.kl(sy_ac_distr, sy_ac_olddistr)) sy_ent = tf.reduce_mean(sy_ac_distr.entropy()) #### sy_surr = -tf.reduce_mean( sy_adv_n * sy_logprob_n ) # Loss function that we'll differentiate to get the policy gradient ("surr" is for "surrogate loss") sy_stepsize = tf.placeholder( shape=[], dtype=tf.float32 ) # Symbolic, in case you want to change the stepsize during optimization. (We're not doing that currently) update_op = tf.train.AdamOptimizer(sy_stepsize).minimize(sy_surr) sess = tf.Session() sess.__enter__() # equivalent to `with sess:` tf.global_variables_initializer().run() #pylint: disable=E1101 total_timesteps = 0 stepsize = initial_stepsize for i in range(n_iter): print("********** Iteration %i ************" % i) #### # YOUR_CODE_HERE # Collect paths until we have enough timesteps timesteps_this_batch = 0 paths = [] while True: ob = env.reset() terminated = False obs, acs, rewards = [], [], [] animate_this_episode = (len(paths) == 0 and (i % 10 == 0) and animate) while True: if animate_this_episode: env.render() obs.append(ob) ac = sess.run(sy_sampled_ac, feed_dict={sy_ob_no: ob[None]}) acs.append(ac) ob, rew, done, _ = env.step([ac]) rewards.append(rew) if done: break path = { "observation": np.array(obs), "terminated": terminated, "reward": np.array(rewards), "action": np.array(acs) } paths.append(path) timesteps_this_batch += pathlength(path) if timesteps_this_batch > min_timesteps_per_batch: break total_timesteps += timesteps_this_batch # Estimate advantage function vtargs, vpreds, advs = [], [], [] for path in paths: rew_t = path["reward"] return_t = discount(rew_t, gamma) vpred_t = vf.predict(path["observation"]) adv_t = return_t - vpred_t advs.append(adv_t) vtargs.append(return_t) vpreds.append(vpred_t) # Build arrays for policy update ob_no = np.concatenate([path["observation"] for path in paths]) ac_n = np.concatenate([path["action"] for path in paths]) adv_n = np.concatenate(advs) standardized_adv_n = (adv_n - adv_n.mean()) / (adv_n.std() + 1e-8) vtarg_n = np.concatenate(vtargs) vpred_n = np.concatenate(vpreds) vf.fit(ob_no, vtarg_n) # Policy update _, oldmean_na, oldlogstdev = sess.run( [update_op, sy_mean_na, logstd_a], feed_dict={ sy_ob_no: ob_no, sy_ac_n: ac_n, sy_adv_n: standardized_adv_n, sy_stepsize: stepsize }) kl, ent = sess.run( [sy_kl, sy_ent], feed_dict={ sy_ob_no: ob_no, sy_oldmean_na: oldmean_na, sy_oldlogstd_a: oldlogstdev }) #### if kl > desired_kl * 2: stepsize /= 1.5 print('stepsize -> %s' % stepsize) elif kl < desired_kl / 2: stepsize *= 1.5 print('stepsize -> %s' % stepsize) else: print('stepsize OK') # Log diagnostics logz.log_tabular("EpRewMean", np.mean([path["reward"].sum() for path in paths])) logz.log_tabular("EpLenMean", np.mean([pathlength(path) for path in paths])) logz.log_tabular("KLOldNew", kl) logz.log_tabular("Entropy", ent) logz.log_tabular("EVBefore", explained_variance_1d(vpred_n, vtarg_n)) logz.log_tabular("EVAfter", explained_variance_1d(vf.predict(ob_no), vtarg_n)) logz.log_tabular("TimestepsSoFar", total_timesteps) # If you're overfitting, EVAfter will be way larger than EVBefore. # Note that we fit value function AFTER using it to compute the advantage function to avoid introducing bias logz.dump_tabular()
with tf.name_scope("cost"): #mean_squared_error RSEcost = tf.reduce_mean( tf.square(y - y_mu)) # use square error for cost function # #negative log-likelihood (same as maximum-likelihood) # y_sigma = tf.sqrt(tfmixedmodel(Xtf, tf.square(std_encoder1), Ztf, tf.square(std_encoder2))) # NLLcost = - tf.reduce_sum(-0.5 * tf.log(2. * np.pi) - tf.log(y_sigma) # -0.5 * tf.square((y - y_mu)/y_sigma)) #Mean-field Variational inference using ELBO p_log_prob = [0.0] * n_samples q_log_prob = [0.0] * n_samples for s in range(n_samples): beta_tf_copy = Normal(loc=beta_mu, scale=std_encoder1) beta_sample = beta_tf_copy.sample() q_log_prob[s] += tf.reduce_sum(beta_tf.log_prob(beta_sample)) b_tf_copy = Normal(loc=b_mu, scale=std_encoder2) b_sample = b_tf_copy.sample() q_log_prob[s] += tf.reduce_sum(b_tf.log_prob(b_sample)) priormodel = Normal(loc=priormu, scale=priorsigma) y_sample = tf.matmul(Xtf, beta_sample) + tf.matmul(Ztf, b_sample) p_log_prob[s] += tf.reduce_sum(priormodel.log_prob(beta_sample)) p_log_prob[s] += tf.reduce_sum(priormodel.log_prob(b_sample)) modelcopy = Normal(loc=y_sample, scale=priorliksigma) p_log_prob[s] += tf.reduce_sum(modelcopy.log_prob(y)) p_log_prob = tf.stack(p_log_prob) q_log_prob = tf.stack(q_log_prob) ELBO = -tf.reduce_mean(p_log_prob - q_log_prob)
def _build_ad_nn(self, tensor_io): from drlutils.dataflow.tensor_io import TensorIO assert (isinstance(tensor_io, TensorIO)) from drlutils.model.base import get_current_nn_context from tensorpack.tfutils.common import get_global_step_var global_step = get_global_step_var() nnc = get_current_nn_context() is_training = nnc.is_training i_state = tensor_io.getInputTensor('state') i_agentIdent = tensor_io.getInputTensor('agentIdent') i_sequenceLength = tensor_io.getInputTensor('sequenceLength') i_resetRNN = tensor_io.getInputTensor('resetRNN') l = i_state # l = tf.Print(l, [i_state, tf.shape(i_state)], 'State = ') # l = tf.Print(l, [i_agentIdent, tf.shape(i_agentIdent)], 'agentIdent = ') # l = tf.Print(l, [i_sequenceLength, tf.shape(i_sequenceLength)], 'SeqLen = ') # l = tf.Print(l, [i_resetRNN, tf.shape(i_resetRNN)], 'resetRNN = ') with tf.variable_scope('critic', reuse=nnc.reuse) as vs: def _get_cell(): cell = tf.nn.rnn_cell.BasicLSTMCell(256) # if is_training: # cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=0.9) return cell cell = tf.nn.rnn_cell.MultiRNNCell([_get_cell() for _ in range(1)]) rnn_outputs = self._buildRNN( l, cell, tensor_io.batchSize, i_agentIdent=i_agentIdent, i_sequenceLength=i_sequenceLength, i_resetRNN=i_resetRNN, ) rnn_outputs = tf.reshape( rnn_outputs, [-1, rnn_outputs.get_shape().as_list()[-1]]) l = rnn_outputs from ad_cur.autodrive.model.selu import fc_selu for lidx in range(2): l = fc_selu( l, 200, keep_prob=1., # 由于我们只使用传感器训练,关键信息不能丢 is_training=is_training, name='fc-{}'.format(lidx)) value = tf.layers.dense(l, 1, name='fc-value') value = tf.squeeze(value, [1], name="value") if not hasattr(self, '_weights_critic'): self._weights_critic = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) with tf.variable_scope('actor', reuse=nnc.reuse) as vs: l = tf.stop_gradient(l) l = tf.layers.dense(l, 128, activation=tf.nn.relu6, name='fc-actor') mu_steering = 0.5 * tf.layers.dense( l, 1, activation=tf.nn.tanh, name='fc-mu-steering') mu_accel = tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-accel') mus = tf.concat([mu_steering, mu_accel], axis=-1) # mus = tf.layers.dense(l, 2, activation=tf.nn.tanh, name='fc-mus') # sigmas = tf.layers.dense(l, 2, activation=tf.nn.softplus, name='fc-sigmas') # sigmas = tf.clip_by_value(sigmas, -0.001, 0.5) def saturating_sigmoid(x): """Saturating sigmoid: 1.2 * sigmoid(x) - 0.1 cut to [0, 1].""" with tf.name_scope("saturating_sigmoid", [x]): y = tf.sigmoid(x) return tf.minimum(1.0, tf.maximum(0.0, 1.2 * y - 0.1)) sigma_steering_ = 0.1 * tf.layers.dense( l, 1, activation=tf.nn.sigmoid, name='fc-sigma-steering') sigma_accel_ = 0.25 * tf.layers.dense( l, 1, activation=tf.nn.sigmoid, name='fc-sigma-accel') if not nnc.is_evaluating: sigma_beta_steering = tf.get_default_graph( ).get_tensor_by_name('actor/sigma_beta_steering:0') sigma_beta_accel = tf.get_default_graph().get_tensor_by_name( 'actor/sigma_beta_accel:0') sigma_beta_steering = tf.constant(1e-4) # sigma_beta_steering_exp = tf.train.exponential_decay(0.3, global_step, 1000, 0.5, name='sigma/beta/steering/exp') # sigma_beta_accel_exp = tf.train.exponential_decay(0.5, global_step, 5000, 0.5, name='sigma/beta/accel/exp') else: sigma_beta_steering = tf.constant(1e-4) sigma_beta_accel = tf.constant(1e-4) sigma_steering = (sigma_steering_ + sigma_beta_steering) sigma_accel = (sigma_accel_ + sigma_beta_accel) sigmas = tf.concat([sigma_steering, sigma_accel], axis=-1) # if is_training: # pass # # 如果不加sigma_beta,收敛会很慢,并且不稳定,猜测可能是以下原因: # # 1、训练前期尽量大的探索可以避免网络陷入局部最优 # # 2、前期过小的sigma会使normal_dist的log_prob过大,导致梯度更新过大,网络一开始就畸形了,很难恢复回来 # # if is_training: # sigmas += sigma_beta_steering # sigma_steering = tf.clip_by_value(sigma_steering, sigma_beta_steering, 0.5) # sigma_accel = tf.clip_by_value(sigma_accel, sigma_beta_accel, 0.5) # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5) # sigmas_orig = sigmas # sigmas = sigmas + sigma_beta_steering # sigmas = tf.minimum(sigmas + 0.1, 100) # sigmas = tf.clip_by_value(sigmas, sigma_beta_steering, 1) # sigma_steering += sigma_beta_steering # sigma_accel += sigma_beta_accel # mus = tf.concat([mu_steering, mu_accel], axis=-1) from tensorflow.contrib.distributions import Normal dists = Normal(mus, sigmas + 0.01) policy = tf.squeeze(dists.sample([1]), [0]) # 裁剪到两倍方差之内 policy = tf.clip_by_value(policy, mus - 2 * sigmas, mus + 2 * sigmas) if is_training: self._addMovingSummary( tf.reduce_mean(mu_steering, name='mu/steering/mean'), tf.reduce_mean(mu_accel, name='mu/accel/mean'), tf.reduce_mean(sigma_steering, name='sigma/steering/mean'), tf.reduce_max(sigma_steering, name='sigma/steering/max'), tf.reduce_mean(sigma_accel, name='sigma/accel/mean'), tf.reduce_max(sigma_accel, name='sigma/accel/max'), # sigma_beta_accel, # sigma_beta_steering, ) # actions = tf.Print(actions, [mus, sigmas, tf.concat([sigma_steering_, sigma_accel_], -1), actions], # 'mu/sigma/sigma.orig/act=', summarize=4) if not hasattr(self, '_weights_actor'): self._weights_actor = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) if not is_training: tensor_io.setOutputTensors(policy, value, mus, sigmas) return i_actions = tensor_io.getInputTensor("action") # i_actions = tf.Print(i_actions, [i_actions], 'actions = ') i_actions = tf.reshape(i_actions, [-1] + i_actions.get_shape().as_list()[2:]) log_probs = dists.log_prob(i_actions) # exp_v = tf.transpose( # tf.multiply(tf.transpose(log_probs), advantage)) # exp_v = tf.multiply(log_probs, advantage) i_advantage = tensor_io.getInputTensor("advantage") i_advantage = tf.reshape(i_advantage, [-1] + i_advantage.get_shape().as_list()[2:]) exp_v = log_probs * tf.expand_dims(i_advantage, -1) entropy = dists.entropy() entropy_beta = tf.get_variable( 'entropy_beta', shape=[], initializer=tf.constant_initializer(0.01), trainable=False) exp_v = entropy_beta * entropy + exp_v loss_policy = tf.reduce_mean(-tf.reduce_sum(exp_v, axis=-1), name='loss/policy') i_futurereward = tensor_io.getInputTensor("futurereward") i_futurereward = tf.reshape(i_futurereward, [-1] + i_futurereward.get_shape().as_list()[2:]) loss_value = tf.reduce_mean(0.5 * tf.square(value - i_futurereward)) loss_entropy = tf.reduce_mean(tf.reduce_sum(entropy, axis=-1), name='xentropy_loss') from tensorflow.contrib.layers.python.layers.regularizers import apply_regularization, l2_regularizer loss_l2_regularizer = apply_regularization(l2_regularizer(1e-4), self._weights_critic) loss_l2_regularizer = tf.identity(loss_l2_regularizer, 'loss/l2reg') loss_value += loss_l2_regularizer loss_value = tf.identity(loss_value, name='loss/value') # self.cost = tf.add_n([loss_policy, loss_value * 0.1, loss_l2_regularizer]) self._addParamSummary([('.*', ['rms', 'absmax'])]) pred_reward = tf.reduce_mean(value, name='predict_reward') import tensorpack.tfutils.symbolic_functions as symbf advantage = symbf.rms(i_advantage, name='rms_advantage') self._addMovingSummary( loss_policy, loss_value, loss_entropy, pred_reward, advantage, loss_l2_regularizer, tf.reduce_mean(policy[:, 0], name='actor/steering/mean'), tf.reduce_mean(policy[:, 1], name='actor/accel/mean'), ) return loss_policy, loss_value
tf.set_random_seed(1) def F(x): return x**2 - 2 * x + 1 def get_fitness(value): return -value mean = tf.Variable(tf.constant(-30.), dtype=tf.float32) sigma = tf.Variable(tf.constant(1.), dtype=tf.float32) N_dist = Normal(loc=mean, scale=sigma) make_kids = N_dist.sample([POP_SIZE]) tfkids = tf.placeholder(tf.float32, [POP_SIZE, DNA_SIZE]) tfkids_fit = tf.placeholder(tf.float32, [POP_SIZE]) loss = -tf.reduce_mean(N_dist.log_prob(tfkids) * tfkids_fit) train_op = tf.train.GradientDescentOptimizer(LR).minimize(loss) x = np.linspace(-70, 70, 100) plt.plot(x, F(x)) plt.xlim(-70, 70) plt.ylim(-100, 1000) sess = tf.Session() init = tf.global_variables_initializer() sess.run(init)
def _get_NN_prediction(self, state): from tensorpack.tfutils import symbolic_functions ctx = get_current_tower_context() is_training = ctx.is_training l = state # l = tf.Print(l, [state], 'State = ') with tf.variable_scope('critic') as vs: from autodrive.model.selu import fc_selu for lidx in range(8): l = fc_selu(l, 200, keep_prob=1., # 由于我们只使用传感器训练,关键信息不能丢 is_training=is_training, name='fc-{}'.format(lidx)) # l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc-dense') # for lidx, hidden_size in enumerate([300, 600]): # l = tf.layers.dense(l, hidden_size, activation=tf.nn.relu, name='fc-%d'%lidx) value = tf.layers.dense(l, 1, name='fc-value',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.1)) if not hasattr(self, '_weights_critic'): self._weights_critic = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) with tf.variable_scope('actor') as vs: l = tf.stop_gradient(l) mu_steering = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-steering',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.01)) mu_accel = tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-accel',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.01)) mus = tf.concat([mu_steering, mu_accel], axis=-1) # mus = tf.layers.dense(l, 2, activation=tf.nn.tanh, name='fc-mus') # sigmas = tf.layers.dense(l, 2, activation=tf.nn.softplus, name='fc-sigmas') # sigmas = tf.clip_by_value(sigmas, -0.001, 0.5) sigma_steering_ = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-steering',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.01)) sigma_accel_ = 1. * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-accel',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.01)) # sigma_beta_steering = symbolic_functions.get_scalar_var('sigma_beta_steering', 0.3, summary=True, trainable=False) # sigma_beta_accel = symbolic_functions.get_scalar_var('sigma_beta_accel', 0.3, summary=True, trainable=False) from tensorpack.tfutils.common import get_global_step_var sigma_beta_steering_exp = tf.train.exponential_decay(0.001, get_global_step_var(), 1000, 0.5, name='sigma/beta/steering/exp') sigma_beta_accel_exp = tf.train.exponential_decay(0.5, get_global_step_var(), 5000, 0.5, name='sigma/beta/accel/exp') # sigma_steering = tf.minimum(sigma_steering_ + sigma_beta_steering, 0.5) # sigma_accel = tf.minimum(sigma_accel_ + sigma_beta_accel, 0.2) # sigma_steering = sigma_steering_ sigma_steering = (sigma_steering_ + sigma_beta_steering_exp) sigma_accel = (sigma_accel_ + sigma_beta_accel_exp) #* 0.1 # sigma_steering = sigma_steering_ # sigma_accel = sigma_accel_ sigmas = tf.concat([sigma_steering, sigma_accel], axis=-1) # sigma_steering = tf.clip_by_value(sigma_steering, 0.1, 0.5) # sigma_accel = tf.clip_by_value(sigma_accel, 0.1, 0.5) # sigmas = sigmas_orig + 0.001 # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5) # sigma_beta = tf.get_variable('sigma_beta', shape=[], dtype=tf.float32, # initializer=tf.constant_initializer(.5), trainable=False) # if is_training: # pass # # 如果不加sigma_beta,收敛会很慢,并且不稳定,猜测可能是以下原因: # # 1、训练前期尽量大的探索可以避免网络陷入局部最优 # # 2、前期过小的sigma会使normal_dist的log_prob过大,导致梯度更新过大,网络一开始就畸形了,很难恢复回来 # # if is_training: # sigmas += sigma_beta_steering # sigma_steering = tf.clip_by_value(sigma_steering, sigma_beta_steering, 0.5) # sigma_accel = tf.clip_by_value(sigma_accel, sigma_beta_accel, 0.5) # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5) # sigmas_orig = sigmas # sigmas = sigmas + sigma_beta_steering # sigmas = tf.minimum(sigmas + 0.1, 100) # sigmas = tf.clip_by_value(sigmas, sigma_beta_steering, 1) # sigma_steering += sigma_beta_steering # sigma_accel += sigma_beta_accel # mus = tf.concat([mu_steering, mu_accel], axis=-1) from tensorflow.contrib.distributions import Normal dists = Normal(mus, sigmas+1e-3) actions = tf.squeeze(dists.sample([1]), [0]) # 裁剪到一倍方差之内 # actions = tf.clip_by_value(actions, -1., 1.) if is_training: summary.add_moving_summary(tf.reduce_mean(mu_steering, name='mu/steering/mean'), tf.reduce_mean(mu_accel, name='mu/accel/mean'), tf.reduce_mean(sigma_steering, name='sigma/steering/mean'), tf.reduce_max(sigma_steering, name='sigma/steering/max'), tf.reduce_mean(sigma_accel, name='sigma/accel/mean'), tf.reduce_max(sigma_accel, name='sigma/accel/max'), sigma_beta_accel_exp, sigma_beta_steering_exp, ) # actions = tf.Print(actions, [mus, sigmas, tf.concat([sigma_steering_, sigma_accel_], -1), actions], # 'mu/sigma/sigma.orig/act=', summarize=4) if not hasattr(self, '_weights_actor'): self._weights_actor = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) return actions, value, dists
def _get_NN_prediction(self, state): from tensorpack.tfutils import symbolic_functions ctx = get_current_tower_context() is_training = ctx.is_training l = state # l = tf.Print(l, [state], 'State = ') with tf.variable_scope('critic') as vs: from autodrive.model.selu import fc_selu for lidx in range(8): l = fc_selu( l, 200, keep_prob=1., # 由于我们只使用传感器训练,关键信息不能丢 is_training=is_training, name='fc-{}'.format(lidx)) # l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc-dense') # for lidx, hidden_size in enumerate([300, 600]): # l = tf.layers.dense(l, hidden_size, activation=tf.nn.relu, name='fc-%d'%lidx) value = tf.layers.dense(l, 1, name='fc-value',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.1)) if not hasattr(self, '_weights_critic'): self._weights_critic = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) with tf.variable_scope('actor') as vs: l = tf.stop_gradient(l) mu_steering = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-steering',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.01)) mu_accel = tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-accel',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.01)) mus = tf.concat([mu_steering, mu_accel], axis=-1) # mus = tf.layers.dense(l, 2, activation=tf.nn.tanh, name='fc-mus') # sigmas = tf.layers.dense(l, 2, activation=tf.nn.softplus, name='fc-sigmas') # sigmas = tf.clip_by_value(sigmas, -0.001, 0.5) sigma_steering_ = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-steering',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.01)) sigma_accel_ = 1. * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-accel',\ kernel_initializer=tf.truncated_normal_initializer(stddev=0.01)) # sigma_beta_steering = symbolic_functions.get_scalar_var('sigma_beta_steering', 0.3, summary=True, trainable=False) # sigma_beta_accel = symbolic_functions.get_scalar_var('sigma_beta_accel', 0.3, summary=True, trainable=False) from tensorpack.tfutils.common import get_global_step_var sigma_beta_steering_exp = tf.train.exponential_decay( 0.001, get_global_step_var(), 1000, 0.5, name='sigma/beta/steering/exp') sigma_beta_accel_exp = tf.train.exponential_decay( 0.5, get_global_step_var(), 5000, 0.5, name='sigma/beta/accel/exp') # sigma_steering = tf.minimum(sigma_steering_ + sigma_beta_steering, 0.5) # sigma_accel = tf.minimum(sigma_accel_ + sigma_beta_accel, 0.2) # sigma_steering = sigma_steering_ sigma_steering = (sigma_steering_ + sigma_beta_steering_exp) sigma_accel = (sigma_accel_ + sigma_beta_accel_exp) #* 0.1 # sigma_steering = sigma_steering_ # sigma_accel = sigma_accel_ sigmas = tf.concat([sigma_steering, sigma_accel], axis=-1) # sigma_steering = tf.clip_by_value(sigma_steering, 0.1, 0.5) # sigma_accel = tf.clip_by_value(sigma_accel, 0.1, 0.5) # sigmas = sigmas_orig + 0.001 # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5) # sigma_beta = tf.get_variable('sigma_beta', shape=[], dtype=tf.float32, # initializer=tf.constant_initializer(.5), trainable=False) # if is_training: # pass # # 如果不加sigma_beta,收敛会很慢,并且不稳定,猜测可能是以下原因: # # 1、训练前期尽量大的探索可以避免网络陷入局部最优 # # 2、前期过小的sigma会使normal_dist的log_prob过大,导致梯度更新过大,网络一开始就畸形了,很难恢复回来 # # if is_training: # sigmas += sigma_beta_steering # sigma_steering = tf.clip_by_value(sigma_steering, sigma_beta_steering, 0.5) # sigma_accel = tf.clip_by_value(sigma_accel, sigma_beta_accel, 0.5) # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5) # sigmas_orig = sigmas # sigmas = sigmas + sigma_beta_steering # sigmas = tf.minimum(sigmas + 0.1, 100) # sigmas = tf.clip_by_value(sigmas, sigma_beta_steering, 1) # sigma_steering += sigma_beta_steering # sigma_accel += sigma_beta_accel # mus = tf.concat([mu_steering, mu_accel], axis=-1) from tensorflow.contrib.distributions import Normal dists = Normal(mus, sigmas + 1e-3) actions = tf.squeeze(dists.sample([1]), [0]) # 裁剪到一倍方差之内 # actions = tf.clip_by_value(actions, -1., 1.) if is_training: summary.add_moving_summary( tf.reduce_mean(mu_steering, name='mu/steering/mean'), tf.reduce_mean(mu_accel, name='mu/accel/mean'), tf.reduce_mean(sigma_steering, name='sigma/steering/mean'), tf.reduce_max(sigma_steering, name='sigma/steering/max'), tf.reduce_mean(sigma_accel, name='sigma/accel/mean'), tf.reduce_max(sigma_accel, name='sigma/accel/max'), sigma_beta_accel_exp, sigma_beta_steering_exp, ) # actions = tf.Print(actions, [mus, sigmas, tf.concat([sigma_steering_, sigma_accel_], -1), actions], # 'mu/sigma/sigma.orig/act=', summarize=4) if not hasattr(self, '_weights_actor'): self._weights_actor = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) return actions, value, dists
class VariationalAutoEncoder(object): def __init__(self, feature_size, latent_size, hidden_sizes, reconstruction_distribution=None, number_of_reconstruction_classes=None, use_batch_norm=True, use_count_sum=True, epsilon=1e-6): # Setup super(VariationalAutoEncoder, self).__init__() self.feature_size = feature_size self.latent_size = latent_size self.hidden_sizes = hidden_sizes self.reconstruction_distribution_name = reconstruction_distribution self.reconstruction_distribution = distributions[ reconstruction_distribution] self.k_max = number_of_reconstruction_classes self.use_batch_norm = use_batch_norm self.use_count_sum = use_count_sum self.epsilon = epsilon # self.graph = tf.Graph() self.x = tf.placeholder(tf.float32, [None, self.feature_size], 'x') # counts if self.use_count_sum: self.n = tf.placeholder(tf.float32, [None, 1], 'N') # total counts sum self.learning_rate = tf.placeholder(tf.float32, [], 'learning_rate') self.warm_up_weight = tf.placeholder(tf.float32, [], 'warm_up_weight') self.is_training = tf.placeholder(tf.bool, [], 'phase') self.inference() self.loss() self.training() self.summary = tf.summary.merge_all() for parameter in tf.trainable_variables(): print(parameter.name, parameter.get_shape()) @property def name(self): #model_name = dataSetBaseName(splitting_method, splitting_fraction, #filtering_method, feature_selection, feature_size) model_name = self.reconstruction_distribution_name.replace(" ", "_") # if self.k_max: # model_name += "_c_" + str(self.k_max) if self.use_count_sum: model_name += "_sum" model_name += "_l_" + str(self.latent_size) + "_h_" + "_".join( map(str, self.hidden_sizes)) if self.use_batch_norm: model_name += "_bn" # model_name += "_lr_{:.1g}".format(self.learning_rate) # model_name += "_b_" + str(self.batch_size) # model_name += "_wu_" + str(number_of_warm_up_epochs) # model_name += "_e_" + str(number_of_epochs) return model_name def inference(self): encoder = self.x with tf.variable_scope("ENCODER"): for i, hidden_size in enumerate(self.hidden_sizes): encoder = dense_layer(inputs=encoder, num_outputs=hidden_size, activation_fn=relu, use_batch_norm=self.use_batch_norm, is_training=self.is_training, scope='{:d}'.format(i + 1)) with tf.variable_scope("Z"): z_mu = dense_layer(inputs=encoder, num_outputs=self.latent_size, activation_fn=None, use_batch_norm=False, is_training=self.is_training, scope='MU') z_sigma = dense_layer( inputs=encoder, num_outputs=self.latent_size, activation_fn=lambda x: tf.exp(tf.clip_by_value(x, -3, 3)), use_batch_norm=False, is_training=self.is_training, scope='SIGMA') self.q_z_given_x = Normal(mu=z_mu, sigma=z_sigma) # Mean of z self.z_mean = self.q_z_given_x.mean() # Stochastic layer self.z = self.q_z_given_x.sample() # Decoder - Generative model, p(x|z) if self.use_count_sum: decoder = tf.concat([self.z, self.n], axis=1, name='Z_N') else: decoder = self.z with tf.variable_scope("DECODER"): for i, hidden_size in enumerate(reversed(self.hidden_sizes)): decoder = dense_layer( inputs=decoder, num_outputs=hidden_size, activation_fn=relu, use_batch_norm=self.use_batch_norm, is_training=self.is_training, scope='{:d}'.format(len(self.hidden_sizes) - i)) # Reconstruction distribution parameterisation with tf.variable_scope("X_TILDE"): x_theta = {} for parameter in self.reconstruction_distribution["parameters"]: parameter_activation_function = \ self.reconstruction_distribution["parameters"]\ [parameter]["activation function"] p_min, p_max = \ self.reconstruction_distribution["parameters"]\ [parameter]["support"] x_theta[parameter] = dense_layer( inputs=decoder, num_outputs=self.feature_size, activation_fn=lambda x: tf.clip_by_value( parameter_activation_function(x), p_min + self.epsilon, p_max - self.epsilon), is_training=self.is_training, scope=parameter.upper()) self.p_x_given_z = self.reconstruction_distribution["class"]( x_theta) if self.k_max: x_logits = dense_layer(inputs=decoder, num_outputs=self.feature_size * self.k_max, activation_fn=None, is_training=self.is_training, scope="P_K") x_logits = tf.reshape(x_logits, [-1, self.feature_size, self.k_max]) self.p_x_given_z = Categorized( dist=self.p_x_given_z, cat=Categorical(logits=x_logits)) self.x_tilde_mean = self.p_x_given_z.mean() # Add histogram summaries for the trainable parameters for parameter in tf.trainable_variables(): tf.summary.histogram(parameter.name, parameter) def loss(self): # Recognition prior p_z_mu = tf.constant(0.0, dtype=tf.float32) p_z_sigma = tf.constant(1.0, dtype=tf.float32) p_z = Normal(p_z_mu, p_z_sigma) # Loss ## Reconstruction error log_p_x_given_z = tf.reduce_mean(tf.reduce_sum( self.p_x_given_z.log_prob(self.x), axis=1), name='reconstruction_error') tf.add_to_collection('losses', log_p_x_given_z) ## Regularisation KL_qp = tf.reduce_mean(tf.reduce_sum(kl(self.q_z_given_x, p_z), axis=1), name="kl_divergence") tf.add_to_collection('losses', KL_qp) # Averaging over samples. self.loss_op = tf.subtract(log_p_x_given_z, KL_qp, name='lower_bound') tf.add_to_collection('losses', self.loss_op) # Add scalar summaries for the losses for l in tf.get_collection('losses'): tf.summary.scalar(l.op.name, l) def training(self): # Create the gradient descent optimiser with the given learning rate. def setupTraining(): # Optimizer and training objective of negative loss optimiser = tf.train.AdamOptimizer(self.learning_rate) # Create a variable to track the global step. self.global_step = tf.Variable(0, name='global_step', trainable=False) # Use the optimiser to apply the gradients that minimize the loss # (and also increment the global step counter) as a single training # step. self.train_op = optimiser.minimize(-self.loss_op, global_step=self.global_step) # Make sure that the updates of the moving_averages in batch_norm # layers are performed before the train_step. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: updates = tf.group(*update_ops) with tf.control_dependencies([updates]): setupTraining() else: setupTraining() def train(self, train_data, valid_data, number_of_epochs=50, batch_size=100, learning_rate=1e-3, log_directory=None, reset_training=False): if self.use_count_sum: n_train = train_data.counts.sum(axis=1).reshape(-1, 1) n_valid = valid_data.counts.sum(axis=1).reshape(-1, 1) if reset_training and os.path.exists(log_directory): for f in os.listdir(log_directory): os.remove(os.path.join(log_directory, f)) os.rmdir(log_directory) # Train M = train_data.number_of_examples self.saver = tf.train.Saver() checkpoint_file = os.path.join(log_directory, 'model.ckpt') with tf.Session() as session: summary_writer = tf.summary.FileWriter(log_directory, session.graph) session.run(tf.global_variables_initializer()) # Print out the defined graph # print("The inference graph:") # print(tf.get_default_graph().as_graph_def()) #train_losses, valid_losses = [], [] feed_dict_train = { self.x: train_data.counts, self.is_training: False } feed_dict_valid = { self.x: valid_data.counts, self.is_training: False } if self.use_count_sum: feed_dict_train[self.n] = n_train feed_dict_valid[self.n] = n_valid for epoch in range(number_of_epochs): shuffled_indices = numpy.random.permutation(M) for i in range(0, M, batch_size): step = session.run(self.global_step) start_time = time() # Feeding in batch to model subset = shuffled_indices[i:(i + batch_size)] batch = train_data.counts[subset] feed_dict_batch = { self.x: batch, self.is_training: True, self.learning_rate: learning_rate } # Adding the sum of counts per cell to the generator after the sample layer. if self.use_count_sum: feed_dict_batch[self.n] = n_train[subset] # Run the stochastic batch training operation. _, batch_loss = session.run([self.train_op, self.loss_op], feed_dict=feed_dict_batch) # Duration of one training step. duration = time() - start_time # Evaluation printout and TensorBoard summary if step % 10 == 0: print('Step {:d}: loss = {:.2f} ({:.3f} sec)'.format( int(step), batch_loss, duration)) summary_str = session.run(self.summary, feed_dict=feed_dict_batch) summary_writer.add_summary(summary_str, step) summary_writer.flush() # Saving model parameters print('Checkpoint reached: Saving model') self.saver.save(session, checkpoint_file) print('Done saving model') # Evaluation print('Evaluating epoch {:d}'.format(epoch)) train_loss = 0 for i in range(0, M, batch_size): subset = slice(i, (i + batch_size)) batch = train_data.counts[subset] feed_dict_batch = {self.x: batch, self.is_training: False} if self.use_count_sum: feed_dict_batch[self.n] = n_train[subset] train_loss += session.run(self.loss_op, feed_dict=feed_dict_batch) train_loss /= M / batch_size print('Done evaluating training set') valid_loss = 0 for i in range(0, valid_data.number_of_examples, batch_size): subset = slice(i, (i + batch_size)) batch = valid_data.counts[subset] feed_dict_batch = {self.x: batch, self.is_training: False} if self.use_count_sum: feed_dict_batch[self.n] = n_valid[subset] valid_loss += session.run(self.loss_op, feed_dict=feed_dict_batch) valid_loss /= valid_data.number_of_examples / batch_size print('Done evaluating validation set') print("Epoch %d: ELBO: %g (Train), %g (Valid)" % (epoch + 1, train_loss, valid_loss)) def evaluate(self, test_set, batch_size=100, log_directory=None): checkpoint = tf.train.get_checkpoint_state(log_directory) if self.use_count_sum: n_test = test_set.counts.sum(axis=1).reshape(-1, 1) with tf.Session() as session: if checkpoint and checkpoint.model_checkpoint_path: self.saver.restore(session, checkpoint.model_checkpoint_path) lower_bound_test = 0 recon_mean_test = numpy.empty( [test_set.number_of_examples, test_set.number_of_features]) z_mu_test = numpy.empty( [test_set.number_of_examples, self.latent_size]) for i in range(0, test_set.number_of_examples, batch_size): subset = slice(i, (i + batch_size)) batch = test_set.counts[subset] feed_dict_batch = {self.x: batch, self.is_training: False} if self.use_count_sum: feed_dict_batch[self.n] = n_test[subset] lower_bound_batch, recon_mean_batch, z_mu_batch = session.run( [self.loss_op, self.x_tilde_mean, self.z_mean], feed_dict=feed_dict_batch) lower_bound_test += lower_bound_batch recon_mean_test[subset] = recon_mean_batch z_mu_test[subset] = z_mu_batch lower_bound_test /= test_set.number_of_examples / batch_size metrics_test = {"LL_test": lower_bound_test} print(metrics_test) return recon_mean_test, z_mu_test, metrics_test
def gaussian_layer(x, in_dim, out_dim, scope, activation_fn=tf.nn.relu, reuse=False, use_mean=False, store=False, use_stored=False, prior_stddev=1.0, l2_const=0.0): """Single layer of fully-connected units where the weights follow a unit gaussian prior, and Args: x: batch of input in_dim: input dimension out_dim: output dimension scope: tensorflow variable scope name activation_fn: activation function use_mean: use the mean of approximate posterior, instead of sampling closed_form_kl: return closed form kl Returns: output and kl of the weights for the layer """ prior_var = prior_stddev**2 with tf.variable_scope(scope, reuse=reuse): w_mean = tf.get_variable('w_mean', shape=[in_dim, out_dim], initializer=xi()) w_row = tf.get_variable('w_row', shape=[in_dim, out_dim], initializer=ni(-3.0, 0.1)) w_stddev = tf.nn.softplus(w_row, name='w_std') + eps w_dist = Normal([0.0] * in_dim * out_dim, [1.0] * in_dim * out_dim) w_std_sample = tf.reshape(w_dist.sample(), [in_dim, out_dim], name='w_std_sample') # local reparametrization w_sample = w_mean + w_std_sample * w_stddev b = tf.get_variable('b', shape=[out_dim], dtype=tf.float32, initializer=xi()) # to store the previous theta value w_last = tf.get_variable('w_last', initializer=tf.zeros([in_dim, out_dim]), trainable=False) if use_mean: out = activation_fn(tf.matmul(x, w_mean) + b, name='activation') return out, 0.0 else: if store: store_op = tf.assign(w_last, w_sample) with tf.control_dependencies([store_op]): out = activation_fn(tf.matmul(x, w_sample) + b, name='activation') else: if use_stored: out = activation_fn(tf.matmul(x, w_last) + b, name='activation') else: out = activation_fn(tf.matmul(x, w_sample) + b, name='activation') D = in_dim * out_dim kl = tf.log(prior_stddev) * D - \ tf.reduce_sum(tf.log(w_stddev+eps)) + \ 0.5*(-D + (tf.reduce_sum(w_stddev**2) + tf.reduce_sum(w_mean**2)) / prior_var) return out, kl
def fit(self, data, epochs=1000, max_seconds=600, activation=tf.nn.elu, batch_norm_decay=0.9, learning_rate=1e-5, batch_sz=1024, adapt_lr=False, print_progress=True, show_fig=True): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # static features X = data['X_train_static_mins'] N, D = X.shape self.X = tf.placeholder(tf.float32, shape=(None, D), name='X') # timeseries features X_time = data['X_train_time_0'] T1, N1, D1 = X_time.shape assert N == N1 self.X_time = tf.placeholder(tf.float32, shape=(T1, None, D1), name='X_time') self.train = tf.placeholder(tf.bool, shape=(), name='train') self.rnn_keep_p_encode = tf.placeholder(tf.float32, shape=(), name='rnn_keep_p_encode') self.rnn_keep_p_decode = tf.placeholder(tf.float32, shape=(), name='rnn_keep_p_decode') adp_learning_rate = tf.placeholder(tf.float32, shape=(), name='adp_learning_rate') he_init = variance_scaling_initializer() bn_params = { 'is_training': self.train, 'decay': batch_norm_decay, 'updates_collections': None } latent_size = self.encoder_layer_sizes[-1] inputs = self.X with tf.variable_scope('static_encoder'): for layer_size, keep_p in zip(self.encoder_layer_sizes[:-1], self.encoder_dropout[:-1]): inputs = dropout(inputs, keep_p, is_training=self.train) inputs = fully_connected(inputs, layer_size, weights_initializer=he_init, activation_fn=activation, normalizer_fn=batch_norm, normalizer_params=bn_params) if self.rnn_encoder_layer_sizes: with tf.variable_scope('rnn_encoder'): rnn_cell = MultiRNNCell([ LayerNormBasicLSTMCell( s, activation=tf.tanh, dropout_keep_prob=self.rnn_encoder_dropout) for s in self.rnn_encoder_layer_sizes ]) time_inputs, states = tf.nn.dynamic_rnn(rnn_cell, self.X_time, swap_memory=True, time_major=True, dtype=tf.float32) time_inputs = tf.transpose(time_inputs, perm=(1, 0, 2)) time_inputs = tf.reshape( time_inputs, shape=(-1, self.rnn_encoder_layer_sizes[-1] * T1)) inputs = tf.concat([inputs, time_inputs], axis=1) with tf.variable_scope('latent_space'): inputs = dropout(inputs, self.encoder_dropout[-1], is_training=self.train) loc = fully_connected(inputs, latent_size, weights_initializer=he_init, activation_fn=None, normalizer_fn=batch_norm, normalizer_params=bn_params) scale = fully_connected(inputs, latent_size, weights_initializer=he_init, activation_fn=tf.nn.softplus, normalizer_fn=batch_norm, normalizer_params=bn_params) standard_normal = Normal(loc=np.zeros(latent_size, dtype=np.float32), scale=np.ones(latent_size, dtype=np.float32)) e = standard_normal.sample(tf.shape(loc)[0]) outputs = e * scale + loc static_output_size = self.decoder_layer_sizes[0] if self.rnn_decoder_layer_sizes: time_output_size = self.rnn_decoder_layer_sizes[0] * T1 output_size = static_output_size + time_output_size else: output_size = static_output_size outputs = fully_connected(outputs, output_size, weights_initializer=he_init, activation_fn=activation, normalizer_fn=batch_norm, normalizer_params=bn_params) if self.rnn_decoder_layer_sizes: outputs, time_outputs = tf.split( outputs, [static_output_size, time_output_size], axis=1) with tf.variable_scope('static_decoder'): for layer_size, keep_p in zip(self.decoder_layer_sizes, self.decoder_dropout[:-1]): outputs = dropout(outputs, keep_p, is_training=self.train) outputs = fully_connected(outputs, layer_size, weights_initializer=he_init, activation_fn=activation, normalizer_fn=batch_norm, normalizer_params=bn_params) outputs = dropout(outputs, self.decoder_dropout[-1], is_training=self.train) outputs = fully_connected(outputs, D, weights_initializer=he_init, activation_fn=None, normalizer_fn=batch_norm, normalizer_params=bn_params) X_hat = Bernoulli(logits=outputs) self.posterior_predictive = X_hat.sample() self.posterior_predictive_probs = tf.nn.sigmoid(outputs) if self.rnn_decoder_layer_sizes: with tf.variable_scope('rnn_decoder'): self.rnn_decoder_layer_sizes.append(D1) time_output_size = self.rnn_decoder_layer_sizes[0] time_outputs = tf.reshape(time_outputs, shape=(-1, T1, time_output_size)) time_outputs = tf.transpose(time_outputs, perm=(1, 0, 2)) rnn_cell = MultiRNNCell([ LayerNormBasicLSTMCell( s, activation=tf.tanh, dropout_keep_prob=self.rnn_decoder_dropout) for s in self.rnn_decoder_layer_sizes ]) time_outputs, states = tf.nn.dynamic_rnn(rnn_cell, time_outputs, swap_memory=True, time_major=True, dtype=tf.float32) time_outputs = tf.transpose(time_outputs, perm=(1, 0, 2)) time_outputs = tf.reshape(time_outputs, shape=(-1, T1 * D1)) X_hat_time = Bernoulli(logits=time_outputs) posterior_predictive_time = X_hat_time.sample() posterior_predictive_time = tf.reshape( posterior_predictive_time, shape=(-1, T1, D1)) self.posterior_predictive_time = tf.transpose( posterior_predictive_time, perm=(1, 0, 2)) self.posterior_predictive_probs_time = tf.nn.sigmoid( time_outputs) kl_div = -tf.log(scale) + 0.5 * (scale**2 + loc**2) - 0.5 kl_div = tf.reduce_sum(kl_div, axis=1) expected_log_likelihood = tf.reduce_sum(X_hat.log_prob(self.X), axis=1) X_time_trans = tf.transpose(self.X_time, perm=(1, 0, 2)) X_time_reshape = tf.reshape(X_time_trans, shape=(-1, T1 * D1)) if self.rnn_encoder_layer_sizes: expected_log_likelihood_time = tf.reduce_sum( X_hat_time.log_prob(X_time_reshape), axis=1) elbo = -tf.reduce_sum(expected_log_likelihood + expected_log_likelihood_time - kl_div) else: elbo = -tf.reduce_sum(expected_log_likelihood - kl_div) train_op = tf.train.AdamOptimizer( learning_rate=adp_learning_rate).minimize(elbo) tf.summary.scalar('elbo', elbo) if self.save_file: saver = tf.train.Saver() if self.tensorboard: for v in tf.trainable_variables(): tf.summary.histogram(v.name, v) train_merge = tf.summary.merge_all() writer = tf.summary.FileWriter(self.tensorboard) self.init_op = tf.global_variables_initializer() n = 0 n_batches = N // batch_sz costs = list() min_cost = np.inf t0 = dt.now() with tf.Session() as sess: sess.run(self.init_op) for epoch in range(epochs): idxs = shuffle(range(N)) X_train = X[idxs] X_train_time = X_time[:, idxs] for batch in range(n_batches): n += 1 X_batch = X_train[batch * batch_sz:(batch + 1) * batch_sz] X_batch_time = X_train_time[:, batch * batch_sz:(batch + 1) * batch_sz] sess.run(train_op, feed_dict={ self.X: X_batch, self.X_time: X_batch_time, self.rnn_keep_p_encode: self.rnn_encoder_dropout, self.rnn_keep_p_decode: self.rnn_decoder_dropout, self.train: True, adp_learning_rate: learning_rate }) if n % 100 == 0 and print_progress: cost = sess.run(elbo, feed_dict={ self.X: X, self.X_time: X_time, self.rnn_keep_p_encode: 1.0, self.rnn_keep_p_decode: 1.0, self.train: False }) cost /= N costs.append(cost) if adapt_lr and epoch > 0: if cost < min_cost: min_cost = cost elif cost > min_cost * 1.01: learning_rate *= 0.75 if print_progress: print('Updating Learning Rate', learning_rate) print('Epoch:', epoch, 'Batch:', batch, 'Cost:', cost) if self.tensorboard: train_sum = sess.run(train_merge, feed_dict={ self.X: X, self.X_time: X_time, self.rnn_keep_p_encode: 1.0, self.rnn_keep_p_decode: 1.0, self.train: False }) writer.add_summary(train_sum, n) seconds = (dt.now() - t0).seconds if seconds > max_seconds: if print_progress: print('Breaking after', seconds, 'seconds') break if self.save_file: saver.save(sess, self.save_file) if self.tensorboard: writer.add_graph(sess.graph) if show_fig: plt.plot(costs) plt.title('Costs and Scores') plt.show()
def step(self, time, inputs, input_latent_sample, states, use_inference, name=None): """Perform a decoding step. Args: time: scalar `int32`. inputs: A (structure of) input tensors. input_latent_sample: Can override sampling of new latent. states: A (structure of) state tensors and TensorArrays. use_inference: If True overrides checks for inference or prior network usage and always uses inference network. name: Name scope for any created operations. Returns: `output_frame, inference_dist, prior_dist, states`. """ cell_outputs, cell_states = dict(), dict() if self._prev_inputs is None: raise ValueError("Need previous input for VariationalDecoder!") with ops.name_scope(name, "VariationalDecoderStep", (time, inputs, states)): if input_latent_sample is None: # predict inference distribution from current frame if any if inputs is not None: cell_outputs['inference'], cell_states['inference'] = \ self._cells['inference'](self._maybe_encode_inputs(inputs), states['inference']) else: cell_outputs['inference'], cell_states[ 'inference'] = None, None # predict learned prior from previous frame if not self._fixed_prior: cell_outputs['prior'], cell_states['prior'] = \ self._cells['prior'](self._maybe_encode_inputs(self._prev_inputs), states['prior']) else: means = tf.zeros([self._batch_size, self._sample_dim]) log_std_dev = tf.log( tf.constant(1.0, shape=[self._batch_size, self._sample_dim])) cell_outputs['prior'] = tf.concat([means, log_std_dev], axis=1) # sample from inference or prior distribution if use_inference: means = cell_outputs['inference'][..., :self._sample_dim] std_dev = tf.exp( cell_outputs['inference'][..., self._sample_dim:]) else: means = cell_outputs['prior'][..., :self._sample_dim] std_dev = tf.exp(cell_outputs['prior'][..., self._sample_dim:]) z_dists = Normal(loc=means, scale=std_dev) z_sample = tf.squeeze(z_dists.sample( [1])) # sample one sample from each distribution if tf.flags.FLAGS.trajectory_space and not tf.flags.FLAGS.trajectory_autoencoding: z_sample = tf.concat([ z_sample, tf.zeros(z_sample.get_shape().as_list()[:-1] + [1], dtype=tf.float32) ], axis=-1) else: z_sample = input_latent_sample cell_outputs['inference'] = None cell_outputs['prior'] = None # reconstruct output with LSTM and decoder if self._use_cdna_model: decoder_input = [ self._prev_inputs, self._first_image, z_sample, self._is_training ] else: decoder_input = tf.concat((self._prev_inputs, z_sample), axis=-1) cell_outputs['output'], cell_states['output'] = \ self._cells['output'](decoder_input, states['output']) if self._output_layer is not None: cell_outputs['output'] = self._output_layer( cell_outputs['output']) return cell_outputs['output'], cell_outputs['inference'], \ cell_outputs['prior'], cell_states, z_sample