def _run_hmc_interleaved(target_cp, target_ncp, param_shapes, step_size_cp=0.1, step_size_ncp=0.1, to_centered=noop, to_noncentered=noop, num_samples=2000, burnin=1000, num_leapfrog_steps=4): g = tf.Graph() with g.as_default(): inner_kernel_cp = mcmc.HamiltonianMonteCarlo( target_log_prob_fn=target_cp, step_size=step_size_cp, num_leapfrog_steps=num_leapfrog_steps) inner_kernel_ncp = mcmc.HamiltonianMonteCarlo( target_log_prob_fn=target_ncp, step_size=step_size_ncp, num_leapfrog_steps=num_leapfrog_steps) kernel = interleaved.Interleaved(inner_kernel_cp, inner_kernel_ncp, to_centered, to_noncentered) states, kernel_results = mcmc.sample_chain( num_results=num_samples, num_burnin_steps=burnin, current_state=[ tf.zeros(param_shapes[param]) for param in param_shapes.keys() ], kernel=kernel) ess_op = tfp.mcmc.effective_sample_size(states) init = tf.global_variables_initializer() with tf.Session() as sess: init.run() start_time = time.time() samples, is_accepted, ess = sess.run( [states, kernel_results.is_accepted, ess_op]) sampling_time = time.time() - start_time results = collections.OrderedDict() results['samples'] = collections.OrderedDict() i = 0 for param in param_shapes.keys(): results['samples'][param] = samples[i] i = i + 1 results['is_accepted'] = is_accepted results['acceptance_rate'] = 'NA' results['ess'] = ess results['sampling_time'] = sampling_time results['step_size'] = 'NA' return results
def run_chain_fn(): return sample_chain( num_results=num_samples, num_burnin_steps=num_burnin_steps, current_state=mcmc_helper.current_state, kernel=mcmc, trace_fn=lambda _, pkr: pkr.inner_results.is_accepted)
def mcmc(self, mcmc_samples, num_burnin_steps, step_size, num_leapfrog_steps=3, initial_state=None, thinning=2): # Function used to perform the sampling for the posterior distributions of the hyperparameters # Inputs: # mcmc_samples := number of samples to collect for the hyperparameters # num_burnin_steps := number of samples to discard # step_size := step_size for the HMC sampler # num_leapfrog_steps := number of leapfrog steps for the HMC sampler # initial_state := list ([beta, varm, loc]) of tensors providing the initial state for the HMC sampler # Outputs: # hyperpar_samples= list [loc_samples_, varm_samples, beta_samples_] of samples for the posterior # distribution of the hyperparameters # acceptance_rate_ := acceptance rate of the sampling unnormalized_posterior_log_prob = functools.partial( self.joint_log_prob, self.noise) #------- Unconstrained representation--------- unconstraining_bijectors = [ tfb.Softplus(), tfb.Softplus(), tfb.Identity() ] if initial_state == None: beta = 1.2 * tf.ones(self.dim_input, tf.float32) varm = 0.8 loc = 0.0 initial_state = [beta, varm, loc] #----Setting up the mcmc sampler [beta_samples, varm_samples, loc_samples], kernel_results = sample_chain( num_results=mcmc_samples, num_burnin_steps=num_burnin_steps, current_state=initial_state, num_steps_between_results=thinning, kernel=TransformedTransitionKernel( inner_kernel=HamiltonianMonteCarlo( target_log_prob_fn=unnormalized_posterior_log_prob, step_size=step_size, num_leapfrog_steps=num_leapfrog_steps), bijector=unconstraining_bijectors)) acceptance_rate = tf.reduce_mean( tf.to_float(kernel_results.inner_results.is_accepted)) with tf.Session() as sess: [acceptance_rate_, loc_samples_, varm_samples_, beta_samples_] = sess.run( [acceptance_rate, loc_samples, varm_samples, beta_samples]) print('Acceptance rate of the HMC sampling:', acceptance_rate_) hyperpar_samples = [loc_samples_, varm_samples_, beta_samples_] return hyperpar_samples, acceptance_rate_
def run_hmc(): return mcmc.sample_chain( num_results=num_results, current_state=initial_state, num_burnin_steps=num_warmup_steps, kernel=mcmc.DualAveragingStepSizeAdaptation( inner_kernel=mcmc.TransformedTransitionKernel( inner_kernel=mcmc.HamiltonianMonteCarlo( target_log_prob_fn=target_log_prob_fn, step_size=initial_step_size, num_leapfrog_steps=num_leapfrog_steps, state_gradients_are_stopped=True), bijector=[param.bijector for param in model.parameters]), num_adaptation_steps=int(num_warmup_steps * 0.8)), seed=seed())
def run_hmc(): return mcmc.sample_chain( num_results=num_results, current_state=initial_state, num_burnin_steps=num_warmup_steps, kernel=mcmc.SimpleStepSizeAdaptation( inner_kernel=mcmc.TransformedTransitionKernel( inner_kernel=mcmc.HamiltonianMonteCarlo( target_log_prob_fn=target_log_prob_fn, step_size=initial_step_size, num_leapfrog_steps=num_leapfrog_steps, state_gradients_are_stopped=True, seed=seed()), bijector=[ param.bijector for param in model.parameters ]), num_adaptation_steps=int(num_warmup_steps * 0.8), adaptation_rate=tf.convert_to_tensor( value=0.1, dtype=initial_state[0].dtype)), parallel_iterations=1 if seed is not None else 10)
def hmc(target, model, model_config, step_size_init, initial_states, reparam): """Runs HMC to sample from the given target distribution.""" if reparam == 'CP': to_centered = lambda x: x elif reparam == 'NCP': to_centered = model_config.to_centered else: to_centered = model_config.make_to_centered(**reparam) model_config = model_config._replace(to_centered=to_centered) initial_states = list(initial_states) # Variational samples. shapes = [s[0].shape for s in initial_states] vectorized_target = vectorize_log_joint_fn(target) per_chain_initial_step_sizes = [ np.array(step_size_init[i] * np.ones(initial_states[i].shape) / (FLAGS.num_leapfrog_steps / 4.)**2).astype(np.float32) for i in range(len(step_size_init)) ] kernel = mcmc.SimpleStepSizeAdaptation( inner_kernel=mcmc.HamiltonianMonteCarlo( target_log_prob_fn=vectorized_target, step_size=per_chain_initial_step_sizes, num_leapfrog_steps=FLAGS.num_leapfrog_steps), adaptation_rate=0.05, num_adaptation_steps=FLAGS.num_adaptation_steps) states_orig, kernel_results = mcmc.sample_chain( num_results=FLAGS.num_samples, num_burnin_steps=FLAGS.num_burnin_steps, current_state=initial_states, kernel=kernel, num_steps_between_results=1) states_transformed = transform_mcmc_states(states_orig, to_centered) ess = tfp.mcmc.effective_sample_size(states_transformed) return states_orig, kernel_results, states_transformed, ess
def fit_with_hmc(model, observed_time_series, num_results=100, num_warmup_steps=50, num_leapfrog_steps=15, initial_state=None, initial_step_size=None, chain_batch_shape=(), num_variational_steps=150, variational_optimizer=None, seed=None, name=None): """Draw posterior samples using Hamiltonian Monte Carlo (HMC). Markov chain Monte Carlo (MCMC) methods are considered the gold standard of Bayesian inference; under suitable conditions and in the limit of infinitely many draws they generate samples from the true posterior distribution. HMC [1] uses gradients of the model's log-density function to propose samples, allowing it to exploit posterior geometry. However, it is computationally more expensive than variational inference and relatively sensitive to tuning. This method attempts to provide a sensible default approach for fitting StructuralTimeSeries models using HMC. It first runs variational inference as a fast posterior approximation, and initializes the HMC sampler from the variational posterior, using the posterior standard deviations to set per-variable step sizes (equivalently, a diagonal mass matrix). During the warmup phase, it adapts the step size to target an acceptance rate of 0.75, which is thought to be in the desirable range for optimal mixing [2]. Args: model: An instance of `StructuralTimeSeries` representing a time-series model. This represents a joint distribution over time-series and their parameters with batch shape `[b1, ..., bN]`. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]]) where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. May optionally be an instance of `tfp.sts.MaskedTimeSeries`, which includes a mask `Tensor` to specify timesteps with missing observations. num_results: Integer number of Markov chain draws. Default value: `100`. num_warmup_steps: Integer number of steps to take before starting to collect results. The warmup steps are also used to adapt the step size towards a target acceptance rate of 0.75. Default value: `50`. num_leapfrog_steps: Integer number of steps to run the leapfrog integrator for. Total progress per HMC step is roughly proportional to `step_size * num_leapfrog_steps`. Default value: `15`. initial_state: Optional Python `list` of `Tensor`s, one for each model parameter, representing the initial state(s) of the Markov chain(s). These should have shape `concat([chain_batch_shape, param.prior.batch_shape, param.prior.event_shape])`. If `None`, the initial state is set automatically using a sample from a variational posterior. Default value: `None`. initial_step_size: Python `list` of `Tensor`s, one for each model parameter, representing the step size for the leapfrog integrator. Must broadcast with the shape of `initial_state`. Larger step sizes lead to faster progress, but too-large step sizes make rejection exponentially more likely. If `None`, the step size is set automatically using the standard deviation of a variational posterior. Default value: `None`. chain_batch_shape: Batch shape (Python `tuple`, `list`, or `int`) of chains to run in parallel. Default value: `[]` (i.e., a single chain). num_variational_steps: Python `int` number of steps to run the variational optimization to determine the initial state and step sizes. Default value: `200`. variational_optimizer: Optional `tf.train.Optimizer` instance to use in the variational optimization. If `None`, defaults to `tf.train.AdamOptimizer(0.1)`. Default value: `None`. seed: Python integer to seed the random number generator. name: Python `str` name prefixed to ops created by this function. Default value: `None` (i.e., 'fit_with_hmc'). Returns: samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([[num_results], chain_batch_shape, param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. kernel_results: A (possibly nested) `tuple`, `namedtuple` or `list` of `Tensor`s representing internal calculations made within the HMC sampler. #### Examples Assume we've built a structural time-series model: ```python day_of_week = tfp.sts.Seasonal( num_seasons=7, observed_time_series=observed_time_series, name='day_of_week') local_linear_trend = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series, name='local_linear_trend') model = tfp.sts.Sum(components=[day_of_week, local_linear_trend], observed_time_series=observed_time_series) ``` To draw posterior samples using HMC under default settings: ```python samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) samples_, kernel_results_ = sess.run((samples, kernel_results)) print("acceptance rate: {}".format( np.mean(kernel_results_.inner_results.is_accepted, axis=0))) print("posterior means: {}".format( {param.name: np.mean(param_draws, axis=0) for (param, param_draws) in zip(model.parameters, samples_)})) ``` We can also run multiple chains. This may help diagnose convergence issues and allows us to exploit vectorization to draw samples more quickly, although warmup still requires the same number of sequential steps. ```python from matplotlib import pylab as plt samples, kernel_results = tfp.sts.fit_with_hmc( model, observed_time_series, chain_batch_shape=[10]) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) samples_, kernel_results_ = sess.run((samples, kernel_results)) print("acceptance rate: {}".format( np.mean(kernel_results_.inner_results.is_accepted, axis=0))) # Plot the sampled traces for each parameter. If the chains have mixed, their # traces should all cover the same region of state space, frequently crossing # over each other. for (param, param_draws) in zip(model.parameters, samples_): if param.prior.event_shape.ndims > 0: print("Only plotting traces for scalar parameters, skipping {}".format( param.name)) continue plt.figure(figsize=[10, 4]) plt.title(param.name) plt.plot(param_draws) plt.ylabel(param.name) plt.xlabel("HMC step") # Combining the samples from multiple chains into a single dimension allows # us to easily pass sampled parameters to downstream forecasting methods. combined_samples_ = [np.reshape(param_draws, [-1] + list(param_draws.shape[2:])) for param_draws in samples_] ``` For greater flexibility, you may prefer to implement your own sampler using the TensorFlow Probability primitives in `tfp.mcmc`. The following recipe constructs a basic HMC sampler, using a `TransformedTransitionKernel` to incorporate constraints on the parameter space. ```python transformed_hmc_kernel = mcmc.TransformedTransitionKernel( inner_kernel=mcmc.HamiltonianMonteCarlo( target_log_prob_fn=model.joint_log_prob(observed_time_series), step_size=step_size, num_leapfrog_steps=num_leapfrog_steps, step_size_update_fn=tfp.mcmc.make_simple_step_size_update_policy( num_adaptation_steps=num_adaptation_steps), state_gradients_are_stopped=True, seed=seed), bijector=[param.bijector for param in model.parameters]) # Initialize from a Uniform[-2, 2] distribution in unconstrained space. initial_state = [tfp.sts.sample_uniform_initial_state( param, return_constrained=True) for param in model.parameters] samples, kernel_results = tfp.mcmc.sample_chain( kernel=transformed_hmc_kernel, num_results=num_results, current_state=initial_state, num_burnin_steps=num_warmup_steps) ``` #### References [1]: Radford Neal. MCMC Using Hamiltonian Dynamics. _Handbook of Markov Chain Monte Carlo_, 2011. https://arxiv.org/abs/1206.1901 [2] M.J. Betancourt, Simon Byrne, and Mark Girolami. Optimizing The Integrator Step Size for Hamiltonian Monte Carlo. https://arxiv.org/abs/1411.6669 """ with tf.compat.v1.name_scope( name, 'fit_with_hmc', values=[observed_time_series]) as name: seed = tfd.SeedStream(seed, salt='StructuralTimeSeries_fit_with_hmc') # Initialize state and step sizes from a variational posterior if not # specified. if initial_step_size is None or initial_state is None: # To avoid threading variational distributions through the training # while loop, we build our own copy here. `make_template` ensures # that our variational distributions share the optimized parameters. def make_variational(): return build_factored_variational_loss( model, observed_time_series, init_batch_shape=chain_batch_shape, seed=seed()) make_variational = tf.compat.v1.make_template('make_variational', make_variational) _, variational_distributions = make_variational() minimize_op = _minimize_in_graph( build_loss_fn=lambda: make_variational()[0], # return just the loss. num_steps=num_variational_steps, optimizer=variational_optimizer) with tf.control_dependencies([minimize_op]): if initial_state is None: initial_state = [tf.stop_gradient(d.sample()) for d in variational_distributions.values()] # Set step sizes using the unconstrained variational distribution. if initial_step_size is None: initial_step_size = [ transformed_q.distribution.stddev() for transformed_q in variational_distributions.values()] # Multiple chains manifest as an extra param batch dimension, so we need to # add a corresponding batch dimension to `observed_time_series`. observed_time_series = sts_util.pad_batch_dimension_for_multiple_chains( observed_time_series, model, chain_batch_shape=chain_batch_shape) # When the initial step size depends on a variational optimization, we # can't initialize step size variables before the optimization runs. # Instead we initialize with a dummy value of the appropriate # shape, then wrap the HMC chain with `control_dependencies` to ensure the # variational step sizes are assigned before HMC actually runs. step_size = [ tf.compat.v1.get_variable( initializer=tf.zeros_like( sample_uniform_initial_state( param, init_sample_shape=chain_batch_shape, return_constrained=False)), name='{}_step_size'.format(param.name), trainable=False, use_resource=True) for (param, ss) in zip(model.parameters, initial_step_size) ] step_size_init_op = tf.group([ tf.compat.v1.assign(ss, initial_ss) for (ss, initial_ss) in zip(step_size, initial_step_size) ]) # Run HMC to sample from the posterior on parameters. with tf.control_dependencies([step_size_init_op]): samples, kernel_results = mcmc.sample_chain( num_results=num_results, current_state=initial_state, num_burnin_steps=num_warmup_steps, kernel=mcmc.TransformedTransitionKernel( inner_kernel=mcmc.HamiltonianMonteCarlo( target_log_prob_fn=model.joint_log_prob(observed_time_series), step_size=step_size, num_leapfrog_steps=num_leapfrog_steps, step_size_update_fn=mcmc.make_simple_step_size_update_policy( num_adaptation_steps=int(num_warmup_steps * 0.8), decrement_multiplier=0.1, increment_multiplier=0.1), state_gradients_are_stopped=True, seed=seed()), bijector=[param.bijector for param in model.parameters]), parallel_iterations=1 if seed is not None else 10) return samples, kernel_results
def _run_hmc(target, param_shapes, transform=noop, step_size_init=0.1, num_samples=2000, burnin=1000, num_adaptation_steps=500, num_leapfrog_steps=4): g = tf.Graph() with g.as_default(): step_size = [tf.get_variable( name='step_size'+str(i), initializer=np.array(step_size_init[i], dtype=np.float32), use_resource=True, # For TFE compatibility. trainable=False) for i in range(len(step_size_init))] kernel = mcmc.HamiltonianMonteCarlo( target_log_prob_fn=target, step_size=step_size, num_leapfrog_steps=num_leapfrog_steps, step_size_update_fn=mcmc.make_simple_step_size_update_policy( num_adaptation_steps=num_adaptation_steps, target_rate=0.85)) states, kernel_results = mcmc.sample_chain( num_results=num_samples, num_burnin_steps=burnin, current_state=[ tf.zeros(param_shapes[param]) for param in param_shapes.keys() ], kernel=kernel, num_steps_between_results=1) tr_states = transform_mcmc_states(states, transform) ess_op = tfp.mcmc.effective_sample_size(tr_states) init = tf.global_variables_initializer() with tf.Session() as sess: init.run() start_time = time.time() (orig_samples, samples, is_accepted, ess, final_step_size, log_accept_ratio) = sess.run([states, tr_states, kernel_results.is_accepted, ess_op, kernel_results.extra.step_size_assign, kernel_results.log_accept_ratio]) sampling_time = time.time() - start_time results = collections.OrderedDict() results['samples'] = collections.OrderedDict() i = 0 for param in param_shapes.keys(): results['samples'][param] = samples[i] i = i + 1 results['orig_samples'] = orig_samples results['is_accepted'] = is_accepted results['acceptance_rate'] = np.sum(is_accepted) * 100. / float(num_samples) results['ess'] = ess results['step_size'] = [s[0] for s in final_step_size] results['sampling_time'] = sampling_time results['log_accept_ratio'] = log_accept_ratio return results
def run_vip_hmc_continuous(model_config, num_samples=2000, burnin=1000, use_iaf_posterior=False, num_leapfrog_steps=4, num_adaptation_steps=500, num_optimization_steps=2000, num_mc_samples=32, tau=1., do_sample=True, description='', experiments_dir=''): tf.reset_default_graph() if use_iaf_posterior: # IAF posterior doesn't give us stddevs for step sizes for HMC (we could # extract them by sampling but I haven't implemented that), and we mostly # care about it for ELBOs anyway. do_sample = False init_val_loc = tf.placeholder('float', shape=()) init_val_scale = tf.placeholder('float', shape=()) (learnable_parameters, learnable_parametrisation, _) = ed_transforms.make_learnable_parametrisation( init_val_loc=init_val_loc, init_val_scale=init_val_scale, tau=tau) def model_vip(*params): with ed.interception(learnable_parametrisation): return model_config.model(*params) log_joint_vip = ed.make_log_joint_fn(model_vip) with ed.tape() as model_tape: _ = model_vip(*model_config.model_args) param_shapes = collections.OrderedDict() target_vip_kwargs = {} for param in model_tape.keys(): if param not in model_config.observed_data.keys(): param_shapes[param] = model_tape[param].shape else: target_vip_kwargs[param] = model_config.observed_data[param] def target_vip(*param_args): i = 0 for param in model_tape.keys(): if param not in model_config.observed_data.keys(): target_vip_kwargs[param] = param_args[i] i = i + 1 return log_joint_vip(*model_config.model_args, **target_vip_kwargs) full_kwargs = collections.OrderedDict(model_config.observed_data.items()) full_kwargs['parameterisation'] = collections.OrderedDict() for k in learnable_parameters.keys(): full_kwargs['parameterisation'][k] = learnable_parameters[k] if use_iaf_posterior: elbo = util.get_iaf_elbo( target_vip, num_mc_samples=num_mc_samples, param_shapes=param_shapes) variational_parameters = {} else: elbo, variational_parameters = util.get_mean_field_elbo( model_vip, target_vip, num_mc_samples=num_mc_samples, model_args=model_config.model_args, vi_kwargs=full_kwargs) vip_step_size_approx = util.get_approximate_step_size( variational_parameters, num_leapfrog_steps) ############################################################################## best_elbo = None model_dir = os.path.join(experiments_dir, str(description + '_' + model_config.model.__name__)) if not tf.gfile.Exists(model_dir): tf.gfile.MakeDirs(model_dir) saver = tf.train.Saver() dir_save = os.path.join(model_dir, 'saved_params_{}'.format(gen_id())) if not tf.gfile.Exists(dir_save): tf.gfile.MakeDirs(dir_save) best_lr = None best_init_loc = None best_init_scale = None learning_rate_ph = tf.placeholder(shape=[], dtype=tf.float32) learning_rate = tf.Variable(learning_rate_ph, trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train = optimizer.minimize(-elbo) init = tf.global_variables_initializer() learning_rates = [0.003, 0.01, 0.01, 0.1, 0.003, 0.01] if use_iaf_posterior: learning_rates = [3e-5, 1e-4, 3e-4, 1e-4] start_time = time.time() for learning_rate_val in learning_rates: for init_loc in [0.]: #, 10., -10.]: for init_scale in [init_loc]: timeline = [] with tf.Session() as sess: init.run(feed_dict={init_val_loc: init_loc, init_val_scale: init_scale, learning_rate_ph: learning_rate_val}) this_timeline = [] for i in range(num_optimization_steps): _, e = sess.run([train, elbo]) if np.isnan(e): util.print('got NaN in ELBO optimization, stopping...') break this_timeline.append(e) this_elbo = np.mean(this_timeline[-100:]) info_str = ('finished cVIP optimization with elbo {} vs ' 'best ELBO {}'.format(this_elbo, best_elbo)) util.print(info_str) if best_elbo is None or best_elbo < this_elbo: best_elbo = this_elbo timeline = this_timeline vals = sess.run(list(learnable_parameters.values())) learned_reparam = collections.OrderedDict( zip(learnable_parameters.keys(), vals)) vals = sess.run(list(variational_parameters.values())) learned_variational_params = collections.OrderedDict( zip(variational_parameters.keys(), vals)) util.print('learned params {}'.format(learned_reparam)) util.print('learned variational params {}'.format( learned_variational_params)) _ = saver.save(sess, dir_save) best_lr = learning_rate best_init_loc = init_loc best_init_scale = init_scale vi_time = time.time() - start_time util.print('BEST: LR={}, init={}, {}'.format(best_lr, best_init_loc, best_init_scale)) util.print('ELBO: {}'.format(best_elbo)) to_centered = model_config.make_to_centered(**learned_reparam) results = collections.OrderedDict() results['elbo'] = best_elbo with tf.Session() as sess: saver.restore(sess, dir_save) results['vp'] = learned_variational_params if do_sample: vip_step_size_init = sess.run(vip_step_size_approx) vip_step_size = [tf.get_variable( name='step_size_vip'+str(i), initializer=np.array(vip_step_size_init[i], dtype=np.float32), use_resource=True, # For TFE compatibility. trainable=False) for i in range(len(vip_step_size_init))] kernel_vip = mcmc.HamiltonianMonteCarlo( target_log_prob_fn=target_vip, step_size=vip_step_size, num_leapfrog_steps=num_leapfrog_steps, step_size_update_fn=mcmc.make_simple_step_size_update_policy( num_adaptation_steps=num_adaptation_steps, target_rate=0.85)) states, kernel_results_vip = mcmc.sample_chain( num_results=num_samples, num_burnin_steps=burnin, current_state=[ tf.zeros(param_shapes[param]) for param in param_shapes.keys() ], kernel=kernel_vip, num_steps_between_results=1) states_vip = transform_mcmc_states(states, to_centered) init_again = tf.global_variables_initializer() init_again.run(feed_dict={ init_val_loc: best_init_loc, init_val_scale: best_init_scale, learning_rate_ph: 1.0}) # learning rate doesn't matter for HMC. ess_vip = tfp.mcmc.effective_sample_size(states_vip) start_time = time.time() samples, is_accepted, ess, ss_vip, log_accept_ratio = sess.run( (states_vip, kernel_results_vip.is_accepted, ess_vip, kernel_results_vip.extra.step_size_assign, kernel_results_vip.log_accept_ratio)) sampling_time = time.time() - start_time results['samples'] = collections.OrderedDict() results['is_accepted'] = is_accepted results['acceptance_rate'] = np.sum(is_accepted) * 100. / float( num_samples) results['ess'] = ess results['sampling_time'] = sampling_time results['log_accept_ratio'] = log_accept_ratio results['step_size'] = [s[0] for s in ss_vip] i = 0 for param in param_shapes.keys(): results['samples'][param] = samples[i] i = i + 1 # end if results['parameterisation'] = collections.OrderedDict() i = 0 for param in param_shapes.keys(): name_a = param[:-5] + 'a' name_b = param[:-5] + 'b' try: results['parameterisation'][name_a] = learned_reparam[name_a] results['parameterisation'][name_b] = learned_reparam[name_b] except KeyError: continue i = i + 1 results['elbo_timeline'] = timeline results['vi_time'] = vi_time results['init_pos'] = best_init_loc return results
def fit_with_hmc(model, observed_time_series, num_results=100, num_warmup_steps=50, num_leapfrog_steps=15, initial_state=None, initial_step_size=None, chain_batch_shape=(), num_variational_steps=150, variational_optimizer=None, seed=None, name=None): """Draw posterior samples using Hamiltonian Monte Carlo (HMC). Markov chain Monte Carlo (MCMC) methods are considered the gold standard of Bayesian inference; under suitable conditions and in the limit of infinitely many draws they generate samples from the true posterior distribution. HMC [1] uses gradients of the model's log-density function to propose samples, allowing it to exploit posterior geometry. However, it is computationally more expensive than variational inference and relatively sensitive to tuning. This method attempts to provide a sensible default approach for fitting StructuralTimeSeries models using HMC. It first runs variational inference as a fast posterior approximation, and initializes the HMC sampler from the variational posterior, using the posterior standard deviations to set per-variable step sizes (equivalently, a diagonal mass matrix). During the warmup phase, it adapts the step size to target an acceptance rate of 0.75, which is thought to be in the desirable range for optimal mixing [2]. Args: model: An instance of `StructuralTimeSeries` representing a time-series model. This represents a joint distribution over time-series and their parameters with batch shape `[b1, ..., bN]`. observed_time_series: `float` `Tensor` of shape `concat([sample_shape, model.batch_shape, [num_timesteps, 1]]) where `sample_shape` corresponds to i.i.d. observations, and the trailing `[1]` dimension may (optionally) be omitted if `num_timesteps > 1`. num_results: Integer number of Markov chain draws. Default value: `100`. num_warmup_steps: Integer number of steps to take before starting to collect results. The warmup steps are also used to adapt the step size towards a target acceptance rate of 0.75. Default value: `50`. num_leapfrog_steps: Integer number of steps to run the leapfrog integrator for. Total progress per HMC step is roughly proportional to `step_size * num_leapfrog_steps`. Default value: `15`. initial_state: Optional Python `list` of `Tensor`s, one for each model parameter, representing the initial state(s) of the Markov chain(s). These should have shape `concat([chain_batch_shape, param.prior.batch_shape, param.prior.event_shape])`. If `None`, the initial state is set automatically using a sample from a variational posterior. Default value: `None`. initial_step_size: Python `list` of `Tensor`s, one for each model parameter, representing the step size for the leapfrog integrator. Must broadcast with the shape of `initial_state`. Larger step sizes lead to faster progress, but too-large step sizes make rejection exponentially more likely. If `None`, the step size is set automatically using the standard deviation of a variational posterior. Default value: `None`. chain_batch_shape: Batch shape (Python `tuple`, `list`, or `int`) of chains to run in parallel. Default value: `[]` (i.e., a single chain). num_variational_steps: Python `int` number of steps to run the variational optimization to determine the initial state and step sizes. Default value: `200`. variational_optimizer: Optional `tf.train.Optimizer` instance to use in the variational optimization. If `None`, defaults to `tf.train.AdamOptimizer(0.1)`. Default value: `None`. seed: Python integer to seed the random number generator. name: Python `str` name prefixed to ops created by this function. Default value: `None` (i.e., 'fit_with_hmc'). Returns: samples: Python `list` of `Tensors` representing posterior samples of model parameters, with shapes `[concat([[num_results], chain_batch_shape, param.prior.batch_shape, param.prior.event_shape]) for param in model.parameters]`. kernel_results: A (possibly nested) `tuple`, `namedtuple` or `list` of `Tensor`s representing internal calculations made within the HMC sampler. #### Examples Assume we've built a structural time-series model: ```python day_of_week = tfp.sts.Seasonal( num_seasons=7, observed_time_series=observed_time_series, name='day_of_week') local_linear_trend = tfp.sts.LocalLinearTrend( observed_time_series=observed_time_series, name='local_linear_trend') model = tfp.sts.Sum(components=[day_of_week, local_linear_trend], observed_time_series=observed_time_series) ``` To draw posterior samples using HMC under default settings: ```python samples, kernel_results = tfp.sts.fit_with_hmc(model, observed_time_series) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) samples_, kernel_results_ = sess.run((samples, kernel_results)) print("acceptance rate: {}".format( np.mean(kernel_results_.inner_results.is_accepted, axis=0))) print("posterior means: {}".format( {param.name: np.mean(param_draws, axis=0) for (param, param_draws) in zip(model.parameters, samples_)})) ``` We can also run multiple chains. This may help diagnose convergence issues and allows us to exploit vectorization to draw samples more quickly, although warmup still requires the same number of sequential steps. ```python from matplotlib import pylab as plt samples, kernel_results = tfp.sts.fit_with_hmc( model, observed_time_series, chain_batch_shape=[10]) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) samples_, kernel_results_ = sess.run((samples, kernel_results)) print("acceptance rate: {}".format( np.mean(kernel_results_.inner_results.is_accepted, axis=0))) # Plot the sampled traces for each parameter. If the chains have mixed, their # traces should all cover the same region of state space, frequently crossing # over each other. for (param, param_draws) in zip(model.parameters, samples_): if param.prior.event_shape.ndims > 0: print("Only plotting traces for scalar parameters, skipping {}".format( param.name)) continue plt.figure(figsize=[10, 4]) plt.title(param.name) plt.plot(param_draws) plt.ylabel(param.name) plt.xlabel("HMC step") # Combining the samples from multiple chains into a single dimension allows # us to easily pass sampled parameters to downstream forecasting methods. combined_samples_ = [np.reshape(param_draws, [-1] + list(param_draws.shape[2:])) for param_draws in samples_] ``` For greater flexibility, you may prefer to implement your own sampler using the TensorFlow Probability primitives in `tfp.mcmc`. The following recipe constructs a basic HMC sampler, using a `TransformedTransitionKernel` to incorporate constraints on the parameter space. ```python transformed_hmc_kernel = mcmc.TransformedTransitionKernel( inner_kernel=mcmc.HamiltonianMonteCarlo( target_log_prob_fn=model.joint_log_prob(observed_time_series), step_size=step_size, num_leapfrog_steps=num_leapfrog_steps, step_size_update_fn=tfp.mcmc.make_simple_step_size_update_policy( num_adaptation_steps=num_adaptation_steps), state_gradients_are_stopped=True, seed=seed), bijector=[param.bijector for param in model.parameters]) # Initialize from a Uniform[-2, 2] distribution in unconstrained space. initial_state = [tfp.sts.sample_uniform_initial_state( param, return_constrained=True) for param in model.parameters] samples, kernel_results = tfp.mcmc.sample_chain( kernel=transformed_hmc_kernel, num_results=num_results, current_state=initial_state, num_burnin_steps=num_warmup_steps) ``` #### References [1]: Radford Neal. MCMC Using Hamiltonian Dynamics. _Handbook of Markov Chain Monte Carlo_, 2011. https://arxiv.org/abs/1206.1901 [2] M.J. Betancourt, Simon Byrne, and Mark Girolami. Optimizing The Integrator Step Size for Hamiltonian Monte Carlo. https://arxiv.org/abs/1411.6669 """ with tf.name_scope(name, 'fit_with_hmc', values=[observed_time_series]) as name: observed_time_series = tf.convert_to_tensor(observed_time_series, name='observed_time_series') seed = tfd.SeedStream(seed, salt='StructuralTimeSeries_fit_with_hmc') # Initialize state and step sizes from a variational posterior if not # specified. if initial_step_size is None or initial_state is None: # To avoid threading variational distributions through the training # while loop, we build our own copy here. `make_template` ensures # that our variational distributions share the optimized parameters. def make_variational(): return build_factored_variational_loss( model, observed_time_series, init_batch_shape=chain_batch_shape, seed=seed()) make_variational = tf.make_template('make_variational', make_variational) _, variational_distributions = make_variational() minimize_op = _minimize_in_graph( build_loss_fn=lambda: make_variational()[0], # return just the loss. num_steps=num_variational_steps, optimizer=variational_optimizer) with tf.control_dependencies([minimize_op]): if initial_state is None: initial_state = [tf.stop_gradient(d.sample()) for d in variational_distributions.values()] # Set step sizes using the unconstrained variational distribution. if initial_step_size is None: initial_step_size = [ transformed_q.distribution.stddev() for transformed_q in variational_distributions.values()] # Multiple chains manifest as an extra param batch dimension, so we need to # add a corresponding batch dimension to `observed_time_series`. observed_time_series = pad_batch_dimension_for_multiple_chains( observed_time_series, model, chain_batch_shape=chain_batch_shape) # When the initial step size depends on a variational optimization, we # can't initialize step size variables before the optimization runs. # Instead we initialize with a dummy value of the appropriate # shape, then wrap the HMC chain with `control_dependencies` to ensure the # variational step sizes are assigned before HMC actually runs. step_size = [tf.get_variable( initializer=tf.zeros_like(sample_uniform_initial_state( param, init_sample_shape=chain_batch_shape, return_constrained=False)), name='{}_step_size'.format(param.name), trainable=False, use_resource=True) for (param, ss) in zip(model.parameters, initial_step_size)] step_size_init_op = tf.group( [tf.assign(ss, initial_ss) for (ss, initial_ss) in zip(step_size, initial_step_size)]) # Run HMC to sample from the posterior on parameters. with tf.control_dependencies([step_size_init_op]): samples, kernel_results = mcmc.sample_chain( num_results=num_results, current_state=initial_state, num_burnin_steps=num_warmup_steps, kernel=mcmc.TransformedTransitionKernel( inner_kernel=mcmc.HamiltonianMonteCarlo( target_log_prob_fn=model.joint_log_prob(observed_time_series), step_size=step_size, num_leapfrog_steps=num_leapfrog_steps, step_size_update_fn=mcmc.make_simple_step_size_update_policy( num_adaptation_steps=int(num_warmup_steps * 0.8), decrement_multiplier=0.1, increment_multiplier=0.1), state_gradients_are_stopped=True, seed=seed()), bijector=[param.bijector for param in model.parameters]), parallel_iterations=1 if seed is not None else 10) return samples, kernel_results
def EM_with_MCMC(self, num_warmup_iters, em_iters, mcmc_samples, num_leapfrog_steps, initial_state=None, learning_rate=0.01, display_rate=200): Wmix = tf.Variable(self.Wmix, name='Wmix_cur') unc_noise_init = tf.math.log(tf.exp(self.noise) - 1) unc_noise = tf.Variable(unc_noise_init, name='unc_noise') # Setting up the step_size and targeted acceptance rate for the MCMC part step_size = tf.Variable(0.01, name='step_size') target_accept_rate = 0.651 if initial_state == None: beta_init = 1.2 * tf.ones([self.n_latent, self.dim_input], dtype=tf.float32) varm_init = 0.8 * tf.ones([self.n_tasks, self.n_latent], dtype=tf.float32) loc_init = tf.zeros(self.n_tasks) varc_init = 1.0 else: beta_init, varm_init, loc_init, varc_init = initial_state beta_cur = tf.Variable(beta_init, name='beta_cur', trainable=False) varm_cur = tf.Variable(varm_init, name='varm_cur', trainable=False) loc_cur = tf.Variable(loc_init, name='loc_cur', trainable=False) varc_cur = tf.Variable(varc_init, name='varc_cur', trainable=False) unconstraining_bijectors = [ tfb.Softplus(), tfb.Softplus(), tfb.Identity(), tfb.Softplus() ] unnormalized_posterior_log_prob = lambda *args: self.joint_log_prob( tf.nn.softplus(unc_noise), Wmix, *args) current_state = [beta_cur, varm_cur, loc_cur, varc_cur] # Initializing a sampler for warmup: sampler = TransformedTransitionKernel( inner_kernel=HamiltonianMonteCarlo( target_log_prob_fn=unnormalized_posterior_log_prob, step_size=step_size, num_leapfrog_steps=num_leapfrog_steps), bijector=unconstraining_bijectors) # One step of the sampler [beta_next, varm_next, loc_next, varc_next], kernel_results = sampler.one_step( current_state=current_state, previous_kernel_results=sampler.bootstrap_results(current_state)) # updating the step size step_size_update = step_size_simple_update( step_size, kernel_results, target_rate=target_accept_rate, decrement_multiplier=0.1, increment_multiplier=0.1) # Updating the state of the hyperparameters beta_update1 = beta_cur.assign(beta_next) varm_update1 = varm_cur.assign(varm_next) loc_update1 = loc_cur.assign(loc_next) varc_update1 = varc_cur.assign(varc_next) warmup_update = tf.group([ beta_update1, varm_update1, loc_update1, varc_update1, step_size_update ]) step_size_update2 = step_size.assign(0.95 * step_size) simple_update = tf.group( [beta_update1, varm_update1, loc_update1, varc_update1]) # Set up E-step with MCMC [beta_probs, varm_probs, loc_probs, varc_probs], em_kernel_results = sample_chain( num_results=10, num_burnin_steps=0, current_state=current_state, kernel=TransformedTransitionKernel( inner_kernel=HamiltonianMonteCarlo( target_log_prob_fn=unnormalized_posterior_log_prob, step_size=0.95 * step_size, num_leapfrog_steps=num_leapfrog_steps), bijector=unconstraining_bijectors)) # Updating the state of the hyperparameters beta_update2 = beta_cur.assign(tf.reduce_mean(beta_probs, axis=0)) varm_update2 = varm_cur.assign(tf.reduce_mean(varm_probs, axis=0)) loc_update2 = loc_cur.assign(tf.reduce_mean(loc_probs, axis=0)) varc_update2 = varc_cur.assign(tf.reduce_mean(varc_probs, axis=0)) expectation_update = tf.group( [beta_update2, varm_update2, loc_update2, varc_update2]) #-- Set up M-step (updating noise variance) with tf.control_dependencies([expectation_update]): loss = -self.joint_log_prob(tf.nn.softplus(unc_noise), Wmix, beta_cur, varm_cur, loc_cur, varc_cur) -self.rv_noise.log_prob(tf.nn.softplus(unc_noise)) \ -self.rv_Wmix.log_prob(Wmix) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) minimization_update = optimizer.minimize(loss) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) # Initial warm-up stage print('First warm-up phase.') num_accepted = 0 for t in range(num_warmup_iters): _, is_accepted_val = sess.run( [warmup_update, kernel_results.inner_results.is_accepted]) num_accepted += is_accepted_val if (t % display_rate == 0) or (t == num_warmup_iters - 1): print( "Warm-Up Iteration: {:>3} Acceptance Rate: {:.3f}".format( t, num_accepted / (t + 1))) loss_history = np.zeros(em_iters) noise_history = np.zeros((em_iters, self.n_tasks)) print('Estimating the noise variance: ') for t in range(em_iters): [_, _, unc_noise_, Wmix_, loss_] = sess.run([ expectation_update, minimization_update, unc_noise, Wmix, loss ]) loss_history[t] = loss_ noise_history[t, :] = np.log(np.exp(unc_noise_) + 1) if (t % display_rate == 0) or (t == em_iters - 1): print("Iteration: {:>4} Loss: {:.3f}".format(t, loss_)) # Second warmup phase print('Second warm-up phase.') num_accepted = 0 for t in range(num_warmup_iters): _, is_accepted_val = sess.run( [warmup_update, kernel_results.inner_results.is_accepted]) num_accepted += is_accepted_val if (t % display_rate == 0) or (t == num_warmup_iters - 1): print( "Warm-Up Iteration: {:>3} Acceptance Rate: {:.3f}".format( t, num_accepted / (t + 1))) step_size_ = sess.run(step_size) if step_size_ < 1e-4: warnings.warn("Estimated step size is low. (less than 1e-4)") print('Collecting samples for the GP hyperparameters.') sess.run(step_size_update2) loc_samples = np.zeros((mcmc_samples, self.n_tasks)) varm_samples = np.zeros((mcmc_samples, self.n_tasks, self.n_latent)) beta_samples = np.zeros((mcmc_samples, self.n_latent, self.dim_input)) varc_samples = np.zeros(mcmc_samples) num_accepted = 0 total_runs = 4 * mcmc_samples for t in range(total_runs): [ _, is_accepted_val, loc_next_, varm_next_, beta_next_, varc_next_ ] = sess.run([ simple_update, kernel_results.inner_results.is_accepted, loc_next, varm_next, beta_next, varc_next ]) if (t % 4 == 0): idx = t // 4 loc_samples[idx, :] = loc_next_ varm_samples[idx, :, :] = varm_next_ beta_samples[idx, :, :] = beta_next_ varc_samples[idx] = varc_next_ num_accepted += is_accepted_val if (t % display_rate == 0) or (t == total_runs - 1): acceptance_rate = num_accepted / (t + 1) print( "Sampling Iteration: {:>3} Acceptance Rate: {:.3f}".format( t, acceptance_rate)) self.noise = np.log(np.exp(unc_noise_) + 1) self.noise = tf.convert_to_tensor(self.noise, tf.float32) self.Wmix = tf.convert_to_tensor(Wmix_, tf.float32) hyperpar_samples = [ loc_samples, varm_samples, beta_samples, varc_samples ] if acceptance_rate < 0.1: warnings.warn("Acceptance rate was low (less than 0.1)") sess.close() return hyperpar_samples, loss_history, noise_history
def mcmc(self, mcmc_samples, num_burnin_steps, step_size, initial_state=None, prev_kernel_results=None, num_leapfrog_steps=3): # Function to perform the sampling for the posterior distributions of the hyperparameters noise = self.noise Wmix = self.Wmix unnormalized_posterior_log_prob = lambda *args: self.joint_log_prob( noise, Wmix, *args) if initial_state == None: print('generating initial state') beta_init = 1.2 * tf.ones([self.n_latent, self.dim_input], dtype=tf.float32) varm_init = 0.8 * tf.ones([self.n_tasks, self.n_latent], dtype=tf.float32) loc_init = tf.zeros(self.n_tasks) varc_init = 1.0 initial_state = [beta_init, varm_init, loc_init, varc_init] #------- Unconstrained representation--------- unconstraining_bijectors = [ tfb.Softplus(), tfb.Softplus(), tfb.Identity(), tfb.Softplus() ] #----Setting up the mcmc sampler [beta_probs, varm_probs, loc_probs, varc_probs], kernel_results = sample_chain( num_results=mcmc_samples, num_burnin_steps=num_burnin_steps, num_steps_between_results=4, current_state=initial_state, previous_kernel_results=prev_kernel_results, kernel=TransformedTransitionKernel( inner_kernel=HamiltonianMonteCarlo( target_log_prob_fn=unnormalized_posterior_log_prob, step_size=step_size, num_leapfrog_steps=num_leapfrog_steps), bijector=unconstraining_bijectors)) acceptance_rate = tf.reduce_mean( tf.to_float(kernel_results.inner_results.is_accepted)) with tf.Session() as sess: [ acceptance_rate_, loc_probs_, varm_probs_, beta_probs_, varc_probs_ ] = sess.run([ acceptance_rate, loc_probs, varm_probs, beta_probs, varc_probs ]) print('acceptance_rate:', acceptance_rate_) hyperpar_samples = [loc_probs_, varm_probs_, beta_probs_, varc_probs_] return hyperpar_samples, acceptance_rate_
def do_sampling(): return mcmc.sample_chain(num_results=FLAGS.num_samples, num_burnin_steps=FLAGS.num_burnin_steps, current_state=initial_states, kernel=kernel, num_steps_between_results=1)
def EM_with_MCMC(self, num_warmup_iters, em_iters, mcmc_samples, num_leapfrog_steps, initial_state=None, learning_rate=0.01, display_rate=200): # Function used to estimate a value for the noise variance and obtain # the samples for the posterior distribution of the hyperparameters # Inputs: # initial_state := list of tensors providing the initial state for the mcmc sampler # num_warmup_iters := number of iterations for the warm-up phase # em_iters := number of iterations for the EM phase # mcmc_samples := number of samples to collect for the hyperparameters # num_leapfrog_steps := number of leapfrog steps for the HMC sampler # learning_rate := learning rate for the optimizer used in the M-step # display_rate := rate at which information is printed # Outputs: # hyperpar_samples= list [loc_samples_, varm_samples, beta_samples_] of samples for the posterior # distribution of the hyperparameters # loss_history := array containing values of the loss fucntion of the M-step # noise_history := array containing values of the noise variance computed in the M-step # defining unconstrained version for the noise level unc_noise_init = tf.log(tf.exp(self.noise) - 1) unc_noise = tf.Variable(unc_noise_init, name='unc_noise') # Setting up the step_size and targeted acceptance rate for the MCMC part step_size = tf.Variable(0.01, name='step_size') target_accept_rate = 0.651 if initial_state == None: beta = 1.2 * tf.ones(self.dim_input, tf.float32) varm = 0.8 loc = 0.0 else: beta, varm, loc = initial_state beta_cur = tf.Variable(beta, name='beta_cur', trainable=False) varm_cur = tf.Variable(varm, name='varm_cur', trainable=False) loc_cur = tf.Variable(loc, name='loc_cur', trainable=False) unconstraining_bijectors = [ tfb.Softplus(), tfb.Softplus(), tfb.Identity() ] unnormalized_posterior_log_prob = functools.partial( self.joint_log_prob, tf.nn.softplus(unc_noise)) current_state = [beta_cur, varm_cur, loc_cur] # Initializing a sampler for warmup: sampler = TransformedTransitionKernel( inner_kernel=HamiltonianMonteCarlo( target_log_prob_fn=unnormalized_posterior_log_prob, step_size=step_size, num_leapfrog_steps=num_leapfrog_steps), bijector=unconstraining_bijectors) # One step of the sampler [beta_next, varm_next, loc_next], kernel_results = sampler.one_step( current_state=current_state, previous_kernel_results=sampler.bootstrap_results(current_state)) # updating the step size step_size_update = step_size_simple_update( step_size, kernel_results, target_rate=target_accept_rate, decrement_multiplier=0.1, increment_multiplier=0.1) # Updating the state of the hyperparameters beta_update1 = beta_cur.assign(beta_next) varm_update1 = varm_cur.assign(varm_next) loc_update1 = loc_cur.assign(loc_next) warmup_update = tf.group( [beta_update1, varm_update1, loc_update1, step_size_update]) step_size_update2 = step_size.assign(0.95 * step_size) simple_update = tf.group([beta_update1, varm_update1, loc_update1]) # Set up E-step with MCMC [beta_probs, varm_probs, loc_probs], em_kernel_results = sample_chain( num_results=10, num_burnin_steps=0, current_state=current_state, kernel=TransformedTransitionKernel( inner_kernel=HamiltonianMonteCarlo( target_log_prob_fn=unnormalized_posterior_log_prob, step_size=0.95 * step_size, num_leapfrog_steps=num_leapfrog_steps), bijector=unconstraining_bijectors)) # Updating the state of the hyperparameters beta_update2 = beta_cur.assign(tf.reduce_mean(beta_probs, axis=0)) varm_update2 = varm_cur.assign(tf.reduce_mean(varm_probs, axis=0)) loc_update2 = loc_cur.assign(tf.reduce_mean(loc_probs, axis=0)) expectation_update = tf.group( [beta_update2, varm_update2, loc_update2]) #-- Set up M-step (updating noise variance) with tf.control_dependencies([expectation_update]): loss = -self.joint_log_prob( tf.nn.softplus(unc_noise), beta_cur, varm_cur, loc_cur) - self.rv_noise.log_prob(tf.nn.softplus(unc_noise)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) minimization_update = optimizer.minimize(loss) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) # Initial warm-up stage print('First warm-up phase.') num_accepted = 0 for t in range(num_warmup_iters): _, is_accepted_val = sess.run( [warmup_update, kernel_results.inner_results.is_accepted]) num_accepted += is_accepted_val if (t % display_rate == 0) or (t == num_warmup_iters - 1): print( "Warm-Up Iteration: {:>3} Acceptance Rate: {:.3f}".format( t, num_accepted / (t + 1))) loss_history = np.zeros([em_iters]) noise_history = np.zeros([em_iters]) print('Estimating the noise variance: ') for t in range(em_iters): [_, _, unc_noise_, loss_] = sess.run( [expectation_update, minimization_update, unc_noise, loss]) loss_history[t] = loss_ noise_history[t] = np.log(np.exp(unc_noise_) + 1) if (t % display_rate == 0) or (t == em_iters - 1): print("Iteration: {:>4} Loss: {:.3f}".format(t, loss_)) # Second warmup phase print('Second warm-up phase.') num_accepted = 0 for t in range(num_warmup_iters): _, is_accepted_val = sess.run( [warmup_update, kernel_results.inner_results.is_accepted]) num_accepted += is_accepted_val if (t % display_rate == 0) or (t == num_warmup_iters - 1): print( "Warm-Up Iteration: {:>3} Acceptance Rate: {:.3f}".format( t, num_accepted / (t + 1))) step_size_ = sess.run(step_size) if step_size_ < 1e-4: warnings.warn("Estimated step size is low. (less than 1e-4)") print('Collecting samples for the GP hyperparameters.') sess.run(step_size_update2) loc_samples = np.zeros(mcmc_samples) varm_samples = np.zeros(mcmc_samples) beta_samples = np.zeros((mcmc_samples, self.dim_input)) num_accepted = 0 for t in range(mcmc_samples): [_, is_accepted_val, loc_next_, varm_next_, beta_next_] = sess.run([ simple_update, kernel_results.inner_results.is_accepted, loc_next, varm_next, beta_next ]) loc_samples[t] = loc_next_ varm_samples[t] = varm_next_ beta_samples[t, :] = beta_next_ num_accepted += is_accepted_val if (t % display_rate == 0) or (t == mcmc_samples - 1): acceptance_rate = num_accepted / (t + 1) print( "Sampling Iteration: {:>3} Acceptance Rate: {:.3f}".format( t, acceptance_rate)) self.noise = math.log(math.exp(unc_noise_) + 1) hyperpar_samples = [loc_samples, varm_samples, beta_samples] if acceptance_rate < 0.1: warnings.warn("Acceptance rate was low (less than 0.1)") sess.close() return hyperpar_samples, loss_history, noise_history
def hmc_interleaved(model_config, target_cp, target_ncp, num_leapfrog_steps_cp, num_leapfrog_steps_ncp, step_size_cp, step_size_ncp, initial_states_cp): model_cp = model_config.model initial_states = list(initial_states_cp) # Variational samples. shapes = [s[0].shape for s in initial_states] cp_step_sizes = [ np.array(np.ones(shape=np.concatenate( [[FLAGS.num_chains], shapes[i]]).astype(int)) * step_size_cp[i], dtype=np.float32) / np.float32( (num_leapfrog_steps_cp / 4.)**2) for i in range(len(step_size_cp)) ] ncp_step_sizes = [ np.array(np.ones(shape=np.concatenate( [[FLAGS.num_chains], shapes[i]]).astype(int)) * step_size_ncp[i], dtype=np.float32) / np.float32( (num_leapfrog_steps_ncp / 4.)**2) for i in range(len(step_size_ncp)) ] vectorized_target_cp = vectorize_log_joint_fn(target_cp) vectorized_target_ncp = vectorize_log_joint_fn(target_ncp) inner_kernel_cp = mcmc.SimpleStepSizeAdaptation( inner_kernel=mcmc.HamiltonianMonteCarlo( target_log_prob_fn=vectorized_target_cp, step_size=cp_step_sizes, num_leapfrog_steps=num_leapfrog_steps_cp), adaptation_rate=0.05, target_accept_prob=0.75, num_adaptation_steps=FLAGS.num_adaptation_steps) inner_kernel_ncp = mcmc.SimpleStepSizeAdaptation( inner_kernel=mcmc.HamiltonianMonteCarlo( target_log_prob_fn=vectorized_target_ncp, step_size=ncp_step_sizes, num_leapfrog_steps=num_leapfrog_steps_ncp), adaptation_rate=0.05, target_accept_prob=0.75, num_adaptation_steps=FLAGS.num_adaptation_steps) to_centered = model_config.to_centered to_noncentered = model_config.to_noncentered kernel = interleaved.Interleaved(inner_kernel_cp, inner_kernel_ncp, vectorise_transform(to_centered), vectorise_transform(to_noncentered)) states, kernel_results = mcmc.sample_chain( num_results=FLAGS.num_samples, num_burnin_steps=FLAGS.num_burnin_steps, current_state=initial_states, kernel=kernel, num_steps_between_results=1) ess = tfp.mcmc.effective_sample_size(states) return states, kernel_results, ess