def initialise_adaptor(theta, L, grad_L, num_adaption_steps, delta, sigma0, use_dense_mass_matrix): """ Creates a generator that terminates by returning an instance of the pints.DualAveragingAdaption. Initialisation of the adaptor requires a 'reasonable' epsilon which is in turn also a generator. The find_reasonable_epsilon generator terminates with return of a 'reasonable' epsilon. Intermediate returns are the current position of the leapfrog integrator. """ # pick the initial inverse mass matrix as the provided sigma0. # reduce to a diagonal matrix if not using a dense mass matrix if use_dense_mass_matrix: init_inv_mass_matrix = sigma0 init_inv_mass_matrix = 1e-3 * np.eye(len(theta)) else: init_inv_mass_matrix = np.diag(sigma0) init_inv_mass_matrix = 1e-3 * np.ones(len(theta)) # find a good value to start epsilon at (this will later be refined so that # the acceptance probability matches delta) epsilon = yield from find_reasonable_epsilon(theta, L, grad_L, init_inv_mass_matrix) # create adaption for epsilon and mass matrix return pints.DualAveragingAdaption(num_adaption_steps, delta, epsilon, init_inv_mass_matrix)
def test_use_dense_mass_matrix(self): num_warmup_steps = 200 target_accept_prob = 1.0 init_epsilon = 1.0 init_inv_mass_matrix = np.array([[1, 0], [0, 1]]) averager = pints.DualAveragingAdaption(num_warmup_steps, target_accept_prob, init_epsilon, init_inv_mass_matrix) self.assertTrue(averager.use_dense_mass_matrix()) init_inv_mass_matrix = np.array([1, 1]) averager = pints.DualAveragingAdaption(num_warmup_steps, target_accept_prob, init_epsilon, init_inv_mass_matrix) self.assertFalse(averager.use_dense_mass_matrix())
def test_set_inv_mass(self): num_warmup_steps = 200 target_accept_prob = 1.0 init_epsilon = 1.0 init_inv_mass_matrix = np.array([[1, 0], [0, 0]]) with StreamCapture() as c: with self.assertRaises(AttributeError): pints.DualAveragingAdaption(num_warmup_steps, target_accept_prob, init_epsilon, init_inv_mass_matrix) self.assertIn("WARNING", c.text())
def test_accept_prob_of_greater_then_one(self): num_warmup_steps = 200 target_accept_prob = 1.0 init_epsilon = 1.0 init_inv_mass_matrix = np.array([[1, 0], [0, 1]]) averager = pints.DualAveragingAdaption(num_warmup_steps, target_accept_prob, init_epsilon, init_inv_mass_matrix) # should adjust the input 2.0 to 1.0, which is the target accept # probability averager.adapt_epsilon(2.0) self.assertEqual(averager._h_bar, 0.0)
def nuts_sampler(x0, delta, num_adaption_steps, sigma0, hamiltonian_threshold, max_tree_depth, use_dense_mass_matrix): """ The dual averaging NUTS mcmc sampler given in Algorithm 6 of [1]_. Implements the multinomial sampling suggested in [2]_. Implements a mass matrix for the dynamics, which is detailed in [2]_. Both the step size and the mass matrix is adapted using a combination of the dual averaging detailed in [1]_ and the windowed adaption for the mass matrix and step size implemented in the Stan library (https://github.com/stan-dev/stan) Implemented as a coroutine that continually generates new theta values to evaluate (L, L') at. Users must send (L, L') back to the coroutine to continue execution. The end of an mcmc step is signalled by generating a tuple of values (theta, L, acceptance probability, number of leapfrog steps) Arguments --------- x0: ndarray starting point delta: float target acceptance probability (Dual Averaging scheme) num_adaption_steps: int number of adaption steps (Dual Averaging scheme) hamiltonian_threshold: float threshold to test divergent iterations max_tree_depth: int maximum tree depth use_dense_mass_matrix: bool if False, use a diagonal mass matrix, if True use a fully dense mass matrix References ---------- .. [1] Hoffman, M. D., & Gelman, A. (2014). The No-U-Turn sampler: adaptively setting path lengths in Hamiltonian Monte Carlo. Journal of Machine Learning Research, 15(1), 1593-1623. .. [2] Betancourt, M. (2018). `A Conceptual Introduction to Hamiltonian Monte Carlo`, https://arxiv.org/abs/1701.02434. """ # Initialise sampler with x0 and calculate logpdf theta = x0 L, grad_L = (yield theta) # Check first point is somewhere sensible if not np.isfinite(L): raise ValueError('Initial point for MCMC must have finite logpdf.') # pick the initial inverse mass matrix as the provided sigma0. # reduce to a diagonal matrix if not using a dense mass matrix if use_dense_mass_matrix: init_inv_mass_matrix = sigma0 init_inv_mass_matrix = 1e-3 * np.eye(len(x0)) else: init_inv_mass_matrix = np.diag(sigma0) init_inv_mass_matrix = 1e-3 * np.ones(len(x0)) # find a good value to start epsilon at (this will later be refined so that # the acceptance probability matches delta) epsilon = yield from find_reasonable_epsilon(theta, L, grad_L, init_inv_mass_matrix) # create adaption for epsilon and mass matrix adaptor = pints.DualAveragingAdaption(num_adaption_steps, delta, epsilon, init_inv_mass_matrix) # start at iteration 1 m = 1 # provide an infinite generator of mcmc steps.... while True: # randomly sample momentum if use_dense_mass_matrix: r0 = np.random.multivariate_normal(np.zeros(len(theta)), adaptor.get_mass_matrix()) else: r0 = np.random.normal(np.zeros(len(theta)), np.sqrt(adaptor.get_mass_matrix())) hamiltonian0 = L - kinetic_energy(r0, adaptor.get_inv_mass_matrix()) # create initial integration path state state = NutsState(theta=theta, r=r0, L=L, grad_L=grad_L, n=0.0, s=1, alpha=1, n_alpha=1, divergent=False, inv_mass_matrix=adaptor.get_inv_mass_matrix()) j = 0 # build up an integration path with 2^j points, stopping when we either # encounter a U-Turn, or reach a max number of points 2^max_tree_depth while j < max_tree_depth and state.s == 1: # pick a random direction to integrate in # (to maintain detailed balance) if np.random.randint(0, 2): vj = 1 else: vj = -1 # recursivly build up tree in that direction state_dash = yield from \ build_tree(state, vj, j, adaptor, hamiltonian0, hamiltonian_threshold) state.update(state_dash, direction=vj, root=True) j += 1 # update current position in chain theta = state.theta L = state.L grad_L = state.grad_L # adapt epsilon and mass matrix using dual averaging restart_stepsize_adapt = \ adaptor.step(state.theta, state.alpha / state.n_alpha) if restart_stepsize_adapt: epsilon = yield from \ find_reasonable_epsilon(theta, L, grad_L, adaptor.get_inv_mass_matrix()) adaptor.init_adapt_epsilon(epsilon) # signal calling process that mcmc step is complete by passing a tuple # (rather than an ndarray) yield (theta, L, grad_L, state.alpha / state.n_alpha, state.n_alpha, state.divergent) # next step m += 1
def test_dual_averaging(self): num_warmup_steps = 200 target_accept_prob = 0.5 init_epsilon = 1.0 init_inv_mass_matrix = np.array([[1, 0], [0, 1]]) target_mass_matrix = np.array([[10, 0], [0, 10]]) # raises an exception if the requested number of warm-up steps is # too low with self.assertRaises(ValueError): averager = pints.DualAveragingAdaption(10, target_accept_prob, init_epsilon, init_inv_mass_matrix) averager = pints.DualAveragingAdaption(num_warmup_steps, target_accept_prob, init_epsilon, init_inv_mass_matrix) # test initialisation self.assertEqual(averager._epsilon, init_epsilon) np.testing.assert_array_equal(averager.get_inv_mass_matrix(), init_inv_mass_matrix) self.assertEqual(averager._counter, 0) # these are the default window sizes for the algorithm initial_window = 75 base_window = 25 terminal_window = 50 self.assertEqual(averager._next_window, initial_window + base_window) self.assertEqual(averager._adapting, True) # dummy function to generate acceptance probabilities # dual averaging will attempt to set epsilon so this function # returns `target_accept_prob` def fake_accept_prob(epsilon): return 1.0 / (10.0 * epsilon) stored_x = np.empty((2, base_window)) for i in range(averager._next_window - 1): x = np.random.multivariate_normal( np.zeros(2) + 123, target_mass_matrix) restart = averager.step(x, fake_accept_prob(averager._epsilon)) self.assertFalse(restart) if i >= averager._initial_window: stored_x[:, i - averager._initial_window] = x # before the end of the window the mass matrix should not have been # updated np.testing.assert_array_equal(averager.get_inv_mass_matrix(), init_inv_mass_matrix) x = np.random.multivariate_normal( np.zeros(2) + 123, target_mass_matrix) np.testing.assert_array_equal(averager._samples[:, :-1], stored_x[:, :-1]) restart = averager.step(x, fake_accept_prob(averager._epsilon)) # end of window triggers a restart self.assertTrue(restart) stored_x[:, -1] = x cov = np.cov(stored_x) n = base_window p = 2 adapted_cov = (n / (n + 5.0)) * cov + \ 1e-3 * (5.0 / (n + 5.0)) * np.eye(p) np.testing.assert_array_equal(averager.get_inv_mass_matrix(), adapted_cov) np.testing.assert_array_equal(averager.get_mass_matrix(), np.linalg.inv(adapted_cov)) # test that we have adapted epsilon correctly self.assertAlmostEqual(fake_accept_prob(averager._epsilon), target_accept_prob, 1) # test the counters self.assertEqual(averager._counter, initial_window + base_window) self.assertEqual(averager._next_window, num_warmup_steps - terminal_window) # test counters for two more windows for i in range(averager._next_window - averager._counter): x = np.random.multivariate_normal( np.zeros(2) + 123, target_mass_matrix) averager.step(x, fake_accept_prob(averager._epsilon)) self.assertEqual(averager._counter, num_warmup_steps - terminal_window) self.assertEqual(averager._next_window, num_warmup_steps) for i in range(averager._next_window - averager._counter): x = np.random.multivariate_normal( np.zeros(2) + 123, target_mass_matrix) averager.step(x, fake_accept_prob(averager._epsilon)) self.assertEqual(averager._counter, num_warmup_steps) self.assertEqual(averager._adapting, False) # check that subsequent steps do nothing old_counter = averager._counter averager.step(x, fake_accept_prob(averager._epsilon)) self.assertEqual(old_counter, averager._counter)