def max_likelihood(self, data, weights=None, stats=None, lmbda=0.1): """ As an alternative to MCMC with Polya-gamma augmentation, we also implement maximum likelihood learning via gradient descent with autograd. This follows the pybasicbayes convention. :param data: list of tuples, (x,y), for each dataset. :param weights: Not used in this implementation. :param stats: Not used in this implementation. """ import autograd.numpy as anp from autograd import value_and_grad, hessian_vector_product from scipy.optimize import minimize assert weights is None assert stats is None if not isinstance(data, list): assert isinstance(data, tuple) and len(data) == 2 data = [data] # Define a helper function for the log of the logistic fn def loglogistic(psi): return psi - anp.log(1+anp.exp(psi)) # optimize each row of A and b for n in range(self.D_out): # Define an objective function for the n-th row of hstack((A, b)) # This is the negative log likelihood of the n-th column of data. def nll(abn): an, bn = abn[:-1], abn[-1] T = 0 ll = 0 for (x, y) in data: T += x.shape[0] yn = y[:, n] psi = anp.dot(x, an) + bn ll += anp.sum(yn * loglogistic(psi)) ll += anp.sum((1 - yn) * loglogistic(-1. * psi)) # Include a penalty on the weights ll -= lmbda * T * anp.sum(an**2) ll -= lmbda * T * bn**2 return -1 * ll / T abn0 = np.concatenate((self.A[n], self.b[n])) res = minimize(value_and_grad(nll), abn0, tol=1e-3, method="Newton-CG", jac=True, hessp=hessian_vector_product(nll)) assert res.success self.A[n] = res.x[:-1] self.b[n] = res.x[-1]
def test_hessian_tensor_product(): fun = lambda a: np.sum(np.sin(a)) a = npr.randn(5, 4, 3) V = npr.randn(5, 4, 3) H = hessian(fun)(a) check_equivalent(np.tensordot(H, V, axes=np.ndim(V)), hessian_vector_product(fun)(a, V))
def test_hessian_matrix_product(): fun = lambda a: np.sum(np.sin(a)) a = npr.randn(5, 4) V = npr.randn(5, 4) H = hessian(fun)(a) check_equivalent(np.tensordot(H, V), hessian_vector_product(fun)(a, V))
def test_hessian_vector_product(): fun = lambda a: np.sum(np.sin(a)) a = npr.randn(5) v = npr.randn(5) H = hessian(fun)(a) check_equivalent(np.dot(H, v), hessian_vector_product(fun)(a, v))
def __init__(self, configurations, parameters, controls, simulation_first_confirmed): self.configurations = configurations self.parameters = parameters self.controls = controls self.simulation_first_confirmed = simulation_first_confirmed flat_args, self.unflatten = flatten(self.controls) # self.obs = obs # self.loc = loc # if noise_covariance is not None: # self.Gamma_noise_inv = np.linalg.inv(noise_covariance) self.gx = grad(self.cost) self.J = jacobian(self.forward) self.hx = hessian_vector_product(self.cost) self.hvp = hvp(self.hx) y0, t_total, N_total, number_group, population_proportion, \ t_control, number_days_per_control_change, number_control_change_times, number_time_dependent_controls = configurations self.N_total = N_total self.number_group = number_group self.t_control = t_control self.dimension = len(self.t_control) self.number_time_dependent_controls = number_time_dependent_controls self.y0 = y0 self.t_total = t_total self.interpolation = piecewiseLinear if number_group > 1: # contact matrix school_closure = True # calendar from February 15th weekday = [2, 3, 4, 5, 6] # calendar from April 1st # weekday = [0, 1, 2, 5, 6] # calendar from May 1st # weekday = [0, 3, 4, 5, 6] calendar = np.zeros(1000 + 1, dtype=int) # set work days as 1 and school days as 2 for i in range(1001): if np.remainder(i, 7) in weekday: calendar[i] = 1 if not school_closure: # and i < 45 calendar[i] = 2 self.calendar = calendar contact = np.load("utils/contact_matrix.npz") self.c_home = contact["home"] self.c_school = contact["school"] self.c_work = contact["work"] self.c_other = contact["other"] self.contact_full = self.c_home + 5. / 7 * ( (1 - school_closure) * self.c_school + self.c_work) + self.c_other self.load_data(fips)
def test_optimization_objective(self): def objective_fun(x): return np.sum(x ** 4) x0 = np.random.random(5) obj = paragami.OptimizationObjective( objective_fun, print_every=0) assert_array_almost_equal(objective_fun(x0), obj.f(x0)) assert_array_almost_equal( autograd.grad(objective_fun)(x0), obj.grad(x0)) assert_array_almost_equal( autograd.hessian(objective_fun)(x0), obj.hessian(x0)) assert_array_almost_equal( autograd.hessian_vector_product(objective_fun)( x0, x0), obj.hessian_vector_product(x0, x0)) def test_print_and_log(num_evals, expected_prints, expected_logs): with captured_output() as (out, err): init_num_iterations = obj.num_iterations() for iter in range(num_evals): # Funtion evaluations should be printed and logged. obj.f(x0) # Derivatives should not count towards printing or logging. obj.grad(x0) obj.hessian(x0) obj.hessian_vector_product(x0, x0) lines = out.getvalue().splitlines() self.assertEqual(init_num_iterations + num_evals, obj.num_iterations()) self.assertEqual(len(lines), expected_prints) self.assertEqual(len(obj.optimization_log), expected_logs) # Test reset. obj.set_print_every(1) obj.set_log_every(1) obj.reset() test_print_and_log(num_evals=1, expected_prints=1, expected_logs=1) obj.reset() test_print_and_log(num_evals=1, expected_prints=1, expected_logs=1) # Test that the first iteration prints and logs no matter what. obj.set_print_every(2) obj.set_log_every(2) obj.reset() test_print_and_log(num_evals=1, expected_prints=1, expected_logs=1) test_print_and_log(num_evals=1, expected_prints=0, expected_logs=1) # Test combinations of print and log. for print_every, log_every in itertools.product([0, 1], [0, 1]): obj.set_print_every(print_every) obj.set_log_every(log_every) obj.reset() test_print_and_log( num_evals=3, expected_prints=3 * print_every, expected_logs=3 * log_every) for print_every, log_every in itertools.product([0, 3], [0, 3]): obj.set_print_every(print_every) obj.set_log_every(log_every) obj.reset() test_print_and_log( num_evals=6, expected_prints=(print_every != 0) * 2, expected_logs=(log_every != 0) * 2) # Test reset only printing or logging. obj.set_print_every(2) obj.set_log_every(1) obj.reset() test_print_and_log(num_evals=1, expected_prints=1, expected_logs=1) test_print_and_log(num_evals=1, expected_prints=0, expected_logs=2) obj.reset() test_print_and_log(num_evals=1, expected_prints=1, expected_logs=1) obj.reset_iteration_count() test_print_and_log(num_evals=1, expected_prints=1, expected_logs=2) obj.reset() test_print_and_log(num_evals=1, expected_prints=1, expected_logs=1) obj.reset_log() test_print_and_log(num_evals=1, expected_prints=0, expected_logs=1)
def set_hvp(self, gamma): self.hvp = lambda theta, v: \ autograd.hessian_vector_product(self.f)(theta, v) + np.sum(gamma * theta)
def hvp(x, vec): result = hessian_vector_product(loss)(x, vec) print('hessian-vector product evaluated at: ({}, {})'.format(x, vec)) hvp_traj.append((x, vec, result)) return result
num_per_class=100, rate=0.4) def objective(params): return -gmm_log_likelihood(params, data) flattened_obj, unflatten, flattened_init_params =\ flatten_func(objective, init_params) fig = plt.figure(figsize=(12, 8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.show(block=False) def callback(flattened_params): params = unflatten(flattened_params) print("Log likelihood {}".format(-objective(params))) ax.cla() ax.plot(data[:, 0], data[:, 1], 'k.') ax.set_xticks([]) ax.set_yticks([]) plot_gaussian_mixture(params, ax) plt.draw() plt.pause(1.0 / 60.0) minimize(flattened_obj, flattened_init_params, jac=grad(flattened_obj), hessp=hessian_vector_product(flattened_obj), method='Newton-CG', callback=callback)
data = make_pinwheel(radial_std=0.3, tangential_std=0.05, num_classes=3, num_per_class=100, rate=0.4) def objective(params): return -gmm_log_likelihood(params, data) flattened_obj, unflatten, flattened_init_params =\ flatten_func(objective, init_params) fig = plt.figure(figsize=(12,8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.show(block=False) def callback(flattened_params): params = unflatten(flattened_params) print("Log likelihood {}".format(-objective(params))) ax.cla() ax.plot(data[:, 0], data[:, 1], 'k.') ax.set_xticks([]) ax.set_yticks([]) plot_gaussian_mixture(params, ax) plt.draw() plt.pause(1.0/60.0) minimize(flattened_obj, flattened_init_params, jac=grad(flattened_obj), hessp=hessian_vector_product(flattened_obj), method='Newton-CG', callback=callback)
def test_objective(self): model = Model(dim=3) objective = obj_lib.Objective(par=model.x, fun=model.f) model.set_inits() x_free = model.x.get_free() x_vec = model.x.get_vector() model.set_opt() self.assertTrue(objective.fun_free(x_free) > 0.0) np_test.assert_array_almost_equal(objective.fun_free(x_free), objective.fun_vector(x_vec)) grad = objective.fun_free_grad(x_free) hess = objective.fun_free_hessian(x_free) np_test.assert_array_almost_equal(np.matmul(hess, grad), objective.fun_free_hvp(x_free, grad)) self.assertTrue(objective.fun_vector(x_vec) > 0.0) grad = objective.fun_vector_grad(x_vec) hess = objective.fun_vector_hessian(x_vec) np_test.assert_array_almost_equal( np.matmul(hess, grad), objective.fun_vector_hvp(x_free, grad)) # Test Jacobians. vec_objective = obj_lib.Objective(par=model.x, fun=model.get_x_vec) vec_jac = vec_objective.fun_vector_jacobian(x_vec) np_test.assert_array_almost_equal(model.b_mat, vec_jac) free_jac = vec_objective.fun_free_jacobian(x_free) x_free_to_vec_jac = \ model.x.free_to_vector_jac(x_free).todense() np_test.assert_array_almost_equal( np.matmul(model.b_mat, np.transpose(x_free_to_vec_jac)), free_jac) # Test the preconditioning preconditioner = 2.0 * np.eye(model.dim) preconditioner[model.dim - 1, 0] = 0.1 # Add asymmetry for testing! objective.preconditioner = preconditioner np_test.assert_array_almost_equal( objective.fun_free_cond(x_free), objective.fun_free(np.matmul(preconditioner, x_free)), err_msg='Conditioned function values') fun_free_cond_grad = autograd.grad(objective.fun_free_cond) grad_cond = objective.fun_free_grad_cond(x_free) np_test.assert_array_almost_equal( fun_free_cond_grad(x_free), grad_cond, err_msg='Conditioned gradient values') fun_free_cond_hessian = autograd.hessian(objective.fun_free_cond) hess_cond = objective.fun_free_hessian_cond(x_free) np_test.assert_array_almost_equal(fun_free_cond_hessian(x_free), hess_cond, err_msg='Conditioned Hessian values') fun_free_cond_hvp = autograd.hessian_vector_product( objective.fun_free_cond) np_test.assert_array_almost_equal( fun_free_cond_hvp(x_free, grad_cond), objective.fun_free_hvp_cond(x_free, grad_cond), err_msg='Conditioned Hessian vector product values')