def get_stick_breaking_entropy(stick_propn_mean, stick_propn_info, gh_loc, gh_weights): # return the entropy of logitnormal distriibution on the sticks whose # logit has mean stick_propn_mean and information stick_propn_info # Integration is done on the real line with respect to the Lesbegue measure # integration is done numerical with Gauss Hermite quadrature. # gh_loc and gh_weights specifiy the location and weights of the # quadrature points # we seek E[log q(V)], where q is the density of a logit-normal, and # V ~ logit-normal. Let W := logit(V), so W ~ Normal. Hence, # E[log q(W)]; we can then decompose log q(x) into the terms of a normal # distribution and the jacobian term. The expectation of the normal term # evaluates to the normal entropy, and we add the jacobian term to it. # The jacobian term is 1/(x(1-x)), so we simply add -EV - E(1-V) to the normal # entropy. assert np.all(gh_weights > 0) assert stick_propn_mean.shape == stick_propn_info.shape assert np.all(stick_propn_info) > 0 e_log_v, e_log_1mv =\ ef.get_e_log_logitnormal( lognorm_means = stick_propn_mean, lognorm_infos = stick_propn_info, gh_loc = gh_loc, gh_weights = gh_weights) return np.sum(ef.univariate_normal_entropy(stick_propn_info)) + \ np.sum(e_log_v + e_log_1mv)
def get_e_log_prior(glmm_par, prior_par): e_beta = glmm_par['beta']['mean'].get() info_beta = glmm_par['beta']['info'].get() #cov_beta = np.linalg.inv(info_beta) cov_beta = np.diag(1. / info_beta) beta_prior_info = prior_par['beta_prior_info'].get() beta_prior_mean = prior_par['beta_prior_mean'].get() e_mu = glmm_par['mu']['mean'].get() info_mu = glmm_par['mu']['info'].get() var_mu = 1 / info_mu e_tau = glmm_par['tau'].e() e_log_tau = glmm_par['tau'].e_log() e_log_p_beta = ef.mvn_prior( prior_mean = prior_par['beta_prior_mean'].get(), prior_info = prior_par['beta_prior_info'].get(), e_obs = e_beta, cov_obs = cov_beta) e_log_p_mu = ef.uvn_prior( prior_mean = prior_par['mu_prior_mean'].get(), prior_info = prior_par['mu_prior_info'].get(), e_obs = e_mu, var_obs = var_mu) e_log_p_tau = ef.gamma_prior( prior_shape = prior_par['tau_prior_alpha'].get(), prior_rate = prior_par['tau_prior_beta'].get(), e_obs = e_tau, e_log_obs = e_log_tau) return e_log_p_beta + e_log_p_mu + e_log_p_tau
def get_mle_log_prior(mle_par, prior_par): beta = mle_par['beta'].get() mu = mle_par['mu'].get() tau = mle_par['tau'].get() K = len(beta) log_p_beta = ef.mvn_prior( prior_mean = prior_par['beta_prior_mean'].get(), prior_info = prior_par['beta_prior_info'].get(), e_obs = beta, cov_obs = np.zeros((K, K))) log_p_mu = ef.uvn_prior( prior_mean = prior_par['mu_prior_mean'].get(), prior_info = prior_par['mu_prior_info'].get(), e_obs = mu, var_obs = 0.0) log_p_tau = ef.gamma_prior( prior_shape = prior_par['tau_prior_alpha'].get(), prior_rate = prior_par['tau_prior_beta'].get(), e_obs = tau, e_log_obs = np.log(tau)) return log_p_beta + log_p_mu + log_p_tau
def test_logitnormal_moments(self): # global parameters for computing lognormals gh_loc, gh_weights = hermgauss(4) # log normal parameters lognorm_means = np.random.random((5, 3)) # should work for arrays now lognorm_infos = np.random.random((5, 3))**2 + 1 alpha = 2 # dp parameter # draw samples num_draws = 10**5 samples = np.random.normal(lognorm_means, 1/np.sqrt(lognorm_infos), size = (num_draws, 5, 3)) logit_norm_samples = sp.special.expit(samples) # test lognormal means np_test.assert_allclose( np.mean(logit_norm_samples, axis = 0), ef.get_e_logitnormal( lognorm_means, lognorm_infos, gh_loc, gh_weights), atol = 3 * np.std(logit_norm_samples) / np.sqrt(num_draws)) # test Elog(x) and Elog(1-x) log_logistic_norm = np.mean(np.log(logit_norm_samples), axis = 0) log_1m_logistic_norm = np.mean(np.log(1 - logit_norm_samples), axis = 0) tol1 = 3 * np.std(np.log(logit_norm_samples))/ np.sqrt(num_draws) tol2 = 3 * np.std(np.log(1 - logit_norm_samples))/ np.sqrt(num_draws) np_test.assert_allclose( log_logistic_norm, ef.get_e_log_logitnormal( lognorm_means, lognorm_infos, gh_loc, gh_weights)[0], atol = tol1) np_test.assert_allclose( log_1m_logistic_norm, ef.get_e_log_logitnormal( lognorm_means, lognorm_infos, gh_loc, gh_weights)[1], atol = tol2) # test prior prior_samples = np.mean((alpha - 1) * np.log(1 - logit_norm_samples), axis = 0) tol3 = 3 * np.std((alpha - 1) * np.log(1 - logit_norm_samples)) \ /np.sqrt(num_draws) np_test.assert_allclose( prior_samples, ef.get_e_dp_prior_logitnorm_approx( alpha, lognorm_means, lognorm_infos, gh_loc, gh_weights), atol = tol3) x = np.random.normal(0, 1e2, size = 10) def e_log_v(x): return np.sum(ef.get_e_log_logitnormal(\ x[0:5], np.abs(x[5:10]), gh_loc, gh_weights)[0]) check_grads(e_log_v, order=2)(x)
def get_global_entropy(glmm_par): info_mu = glmm_par['mu']['info'].get() info_beta = glmm_par['beta']['info'].get() tau_shape = glmm_par['tau']['shape'].get() tau_rate = glmm_par['tau']['rate'].get() return \ ef.univariate_normal_entropy(info_mu) + \ ef.univariate_normal_entropy(info_beta) + \ ef.gamma_entropy(tau_shape, tau_rate)
def test_dirichlet_entropy(self): alpha = np.array([23, 4, 5, 6, 7]) dirichlet_dist = sp.stats.dirichlet(alpha) self.assertAlmostEqual\ (dirichlet_dist.entropy(), ef.dirichlet_entropy(alpha)) alpha_shape = (5, 2) alpha = 10 * np.random.random(alpha_shape) ef_entropy = ef.dirichlet_entropy(alpha) dirichlet_entropy = \ [ sp.stats.dirichlet.entropy(alpha[:, k]) for k in range(2) ] np_test.assert_array_almost_equal(dirichlet_entropy, ef_entropy)
def get_log_prior(centroids, probs, prior_params): num_components = centroids.shape[0] obs_dim = centroids.shape[1] log_prior = 0 log_probs = np.log(probs[0, :]) log_prior += ef.dirichlet_prior(prior_params['probs_alpha'], log_probs) for k in range(num_components): log_prior += ef.mvn_prior(prior_params['centroid_prior_mean'][k, :], prior_params['centroid_prior_info'], centroids[k, :], np.zeros( (obs_dim, obs_dim))) return (log_prior)
def test_mvn_entropy(self): mean_par = np.array([1., 2.]) info_par = np.eye(2) + np.full((2, 2), 0.1) norm_dist = sp.stats.multivariate_normal( mean=mean_par, cov=np.linalg.inv(info_par)) self.assertAlmostEqual( norm_dist.entropy(), ef.multivariate_normal_entropy(info_par))
def test_uvn_entropy(self): mean_par = 2.0 info_par = 1.5 num_draws = 10000 norm_dist = sp.stats.norm(loc=mean_par, scale=np.sqrt(1 / info_par)) self.assertAlmostEqual( norm_dist.entropy(), ef.univariate_normal_entropy(info_par))
def test_get_uvn_from_natural_parameters(self): true_mean = 1.5 true_info = 0.4 true_sd = 1 / np.sqrt(true_info) num_draws = 10000 e_x = true_mean e_x2 = true_mean ** 2 + true_sd ** 2 draws = np.random.normal(0, 1, num_draws) def get_log_normal_prob(e_x, e_x2): sd = np.sqrt(e_x2 - e_x ** 2) draws_shift = sd * draws + e_x log_pdf = -0.5 * true_info * (draws_shift - true_mean) ** 2 return np.mean(log_pdf) get_log_normal_prob_grad_1 = \ grad(get_log_normal_prob, argnum=0) get_log_normal_prob_grad_2 = \ grad(get_log_normal_prob, argnum=1) e_x_term = get_log_normal_prob_grad_1(e_x, e_x2) e_x2_term = get_log_normal_prob_grad_2(e_x, e_x2) mean, info = ef.get_uvn_from_natural_parameters(e_x_term, e_x2_term) atol = 3 * true_sd / np.sqrt(num_draws) np_test.assert_allclose(true_mean, mean, atol=atol, err_msg='mean') np_test.assert_allclose(true_info, info, atol=atol, err_msg='info')
def test_wishart_moments(self): num_draws = 10000 df = 4.3 v = np.diag(np.array([2., 3.])) + np.full((2, 2), 0.1) wishart_dist = sp.stats.wishart(df=df, scale=v) wishart_draws = wishart_dist.rvs(num_draws) log_det_draws = np.linalg.slogdet(wishart_draws)[1] moment_tolerance = 3.0 * np.std(log_det_draws) / np.sqrt(num_draws) print('Wishart e log det test tolerance: ', moment_tolerance) np_test.assert_allclose( np.mean(log_det_draws), ef.e_log_det_wishart(df, v), atol=moment_tolerance) # Test the log inverse diagonals wishart_inv_draws = \ [ np.linalg.inv(wishart_draws[n, :, :]) for n in range(num_draws) ] wishart_log_diag = \ np.log([ np.diag(mat) for mat in wishart_inv_draws ]) diag_mean = np.mean(wishart_log_diag, axis=0) diag_sd = np.std(wishart_log_diag, axis=0) moment_tolerance = 3.0 * np.max(diag_sd) / np.sqrt(num_draws) print('Wishart e log diag test tolerance: ', moment_tolerance) np_test.assert_allclose( diag_mean, ef.e_log_inv_wishart_diag(df, v), atol=moment_tolerance) # Test the LKJ prior lkj_param = 5.5 def get_r_matrix(mat): mat_diag = np.diag(1. / np.sqrt(np.diag(mat))) return np.matmul(mat_diag, np.matmul(mat, mat_diag)) wishart_log_det_r_draws = \ np.array([ np.linalg.slogdet(get_r_matrix(mat))[1] \ for mat in wishart_inv_draws ]) * (lkj_param - 1) moment_tolerance = \ 3.0 * np.std(wishart_log_det_r_draws) / np.sqrt(num_draws) print('Wishart lkj prior test tolerance: ', moment_tolerance) np_test.assert_allclose( np.mean(wishart_log_det_r_draws), ef.expected_ljk_prior(lkj_param, df, v), atol=moment_tolerance)
def get_e_func_logit_stick_vec(vb_params_dict, gh_loc, gh_weights, func): stick_propn_mean = vb_params_dict['stick_propn_mean'] stick_propn_info = vb_params_dict['stick_propn_info'] # print('DEBUG: 0th lognorm mean: ', stick_propn_mean[0]) e_phi = np.array([ ef.get_e_fun_normal( stick_propn_mean[k], stick_propn_info[k], \ gh_loc, gh_weights, func) for k in range(len(stick_propn_mean)) ]) return e_phi
def get_log_prior(self): beta = self.glmm_par_draw['beta'].get() mu = self.glmm_par_draw['mu'].get() tau = self.glmm_par_draw['tau'].get() log_tau = np.log(tau) cov_beta = np.zeros((self.K, self.K)) beta_prior_info = self.prior_par['beta_prior_info'].get() beta_prior_mean = self.prior_par['beta_prior_mean'].get() log_p_beta = ef.mvn_prior( beta_prior_mean, beta_prior_info, beta, cov_beta) log_p_mu = ef.uvn_prior( self.prior_par['mu_prior_mean'].get(), self.prior_par['mu_prior_info'].get(), mu, 0.0) tau_prior_shape = self.prior_par['tau_prior_alpha'].get() tau_prior_rate = self.prior_par['tau_prior_beta'].get() log_p_tau = ef.gamma_prior( tau_prior_shape, tau_prior_rate, tau, log_tau) return log_p_beta + log_p_mu + log_p_tau
def get_e_centroid_prior(centroids, prior_mean, prior_info): # expected log prior for cluster centroids # Note that the variational distribution for the centroid is a dirac # delta function assert prior_info > 0 beta_base_prior = ef.uvn_prior(prior_mean = prior_mean, prior_info = prior_info, e_obs = centroids.flatten(), var_obs = np.array([0.])) return np.sum(beta_base_prior)
def get_e_logitnorm_dp_prior(stick_propn_mean, stick_propn_info, alpha, gh_loc, gh_weights): # expected log prior for the stick breaking proportions under the # logitnormal variational distribution # integration is done numerical with Gauss Hermite quadrature. # gh_loc and gh_weights specifiy the location and weights of the # quadrature points assert np.all(gh_weights > 0) assert stick_propn_mean.shape == stick_propn_info.shape assert np.all(stick_propn_info) > 0 e_log_v, e_log_1mv = \ ef.get_e_log_logitnormal( lognorm_means = stick_propn_mean, lognorm_infos = stick_propn_info, gh_loc = gh_loc, gh_weights = gh_weights) return (alpha - 1) * np.sum(e_log_1mv)
def get_e_log_cluster_probabilities(stick_propn_mean, stick_propn_info, gh_loc, gh_weights): # the expected log mixture weights # stick_propn_mean is of shape ... x k_approx assert np.all(gh_weights > 0) assert stick_propn_mean.shape == stick_propn_info.shape if len(stick_propn_mean.shape) == 1: stick_propn_mean = stick_propn_mean[None, :] stick_propn_info = stick_propn_info[None, :] assert np.all(stick_propn_info) > 0 e_log_v, e_log_1mv = \ ef.get_e_log_logitnormal( lognorm_means = stick_propn_mean, lognorm_infos = stick_propn_info, gh_loc = gh_loc, gh_weights = gh_weights) return get_e_log_cluster_probabilities_from_e_log_stick(e_log_v, e_log_1mv)
def entropy(self): return ef.gamma_entropy( shape=self['shape'].get(), rate=self['rate'].get())
def e_log_lkj_inv_prior(self, lkj_param): return ef.expected_ljk_prior(lkj_param, self['df'].get(), self['v'].get())
def get_local_entropy(glmm_par): info_u = glmm_par['u']['info'].get() return ef.univariate_normal_entropy(info_u)
def e_log(self): return ef.get_e_log_dirichlet(self['alpha'].get())
def entropy(self): return ef.dirichlet_entropy(self['alpha'].get())
def e_log(self): return ef.get_e_log_gamma( shape=self['shape'].get(), rate=self['rate'].get())
def entropy(self): return ef.wishart_entropy(self['df'].get(), self['v'].get())
def get_kl(log_lik_by_nk, e_z, log_prior): num_obs = e_z.shape[0] return -1 * (np.sum(np.sum(e_z * log_lik_by_nk, axis=1)) + np.sum(ef.multinoulli_entropy(e_z)) + log_prior) / num_obs
def test_gamma_entropy(self): shape = 3.0 rate = 2.4 gamma_dist = sp.stats.gamma(a=shape, scale=1 / rate) self.assertAlmostEqual(gamma_dist.entropy(), ef.gamma_entropy(shape, rate))
def e_log_v(x): return np.sum(ef.get_e_log_logitnormal(\ x[0:5], np.abs(x[5:10]), gh_loc, gh_weights)[0])
def test_wishart_entropy(self): df = 4.3 v = np.eye(2) + np.full((2, 2), 0.1) wishart_dist = sp.stats.wishart(df=df, scale=v) self.assertAlmostEqual( wishart_dist.entropy(), ef.wishart_entropy(df, v))
def test_beta_entropy(self): tau = np.array([[1,2], [3,4], [5,6]]) test_entropy = np.sum([sp.stats.beta.entropy(tau[i, 0], tau[i, 1]) for i in range(np.shape(tau)[0])]) self.assertAlmostEqual(ef.beta_entropy(tau), test_entropy)
def get_e_cluster_probabilities(stick_propn_mean, stick_propn_info, gh_loc, gh_weights): e_stick_lengths = \ ef.get_e_logitnormal(stick_propn_mean, stick_propn_info, gh_loc, gh_weights) return get_mixture_weights_from_stick_break_propns(e_stick_lengths)
def e_log_det(self): return ef.e_log_det_wishart(self['df'].get(), self['v'].get())