def update_nn(init_var_params, batch_data, batch_labels): log_posterior = lambda weights, t: logprob(weights, batch_data, batch_labels) # Build variational objective. objective, gradient, unpack_params = \ black_box_variational_inference(log_posterior, num_weights, num_samples=20) variational_params = adam(gradient, init_var_params, step_size=0.01, num_iters=50) return variational_params
def update_nn(init_var_params, batch_data, batch_labels, iteration): log_posterior = lambda weights, t: logprob(weights, batch_data, batch_labels) # Build variational objective. objective, gradient, unpack_params = \ black_box_variational_inference(log_posterior, num_weights, num_samples=20) variational_params = adam(gradient, init_var_params, step_size=0.1, num_iters=10) return variational_params, objective
coverage_df[input_idx, :] = 1 # indicate the training data index for b in range(B): print(b) # Specify inference problem by its unnormalized log-posterior. rbf = lambda x: np.exp(-x**2) num_weights, predictions, logprob = \ make_nn_funs(layer_sizes=[1, 2, 2, 1], L2_reg=0.1, noise_variance=0.01, nonlinearity=rbf) inputs, targets, tot_inputs, tot_targets, input_idx = build_toy_dataset( n_data=40, noise_std=tau, type=t) log_posterior = lambda weights, t: logprob(weights, inputs, targets) # Build variational objective. objective, gradient, unpack_params = \ black_box_variational_inference(log_posterior, num_weights, num_samples=20) # Set up figure. fig = plt.figure(figsize=(12, 8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion() plt.show(block=False) def callback(params, t, g): print("Iteration {} lower bound {}".format(t, -objective(params, t))) # Sample functions from posterior. rs = npr.RandomState(0) mean, log_std = unpack_params(params) # rs = npr.RandomState(0)
if __name__ == '__main__': # Specify inference problem by its unnormalized log-posterior. rbf = lambda x: norm.pdf(x, 0, 1) num_weights, predictions, logprob = \ make_nn_funs(layer_sizes=[1, 20, 1], L2_reg=0.01, noise_variance = 0.01, nonlinearity=rbf) inputs, targets = build_toy_dataset() log_posterior = lambda weights, t: logprob(weights, inputs, targets) # Build variational objective. objective, gradient, unpack_params = \ black_box_variational_inference(log_posterior, num_weights, num_samples=20) # Set up figure. fig = plt.figure(figsize=(8,8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion() plt.show(block=False) def callback(params, t, g): print("Iteration {} lower bound {}".format(t, -objective(params, t))) # Sample functions from posterior. mean, cov = unpack_params(params) rs = npr.RandomState(0) sample_weights = rs.randn(10, num_weights) * np.sqrt(cov) + mean plot_inputs = np.linspace(-8, 8, num=200)
# it's difficult to see the benefit in low dimensions # model parameters are a mean and a log_sigma np.random.seed(42) obs_dim = 20 Y = np.random.randn(obs_dim, obs_dim).dot(np.random.randn(obs_dim)) def log_density(x, t): mu, log_sigma = x[:, :obs_dim], x[:, obs_dim:] sigma_density = np.sum(norm.logpdf(log_sigma, 0, 1.35), axis=1) mu_density = np.sum(norm.logpdf(Y, mu, np.exp(log_sigma)), axis=1) return sigma_density + mu_density # Build variational objective. D = obs_dim * 2 # dimension of our posterior objective, gradient, unpack_params = \ black_box_variational_inference(log_density, D, num_samples=2000) # Define the natural gradient # The natural gradient of the ELBO is the gradient of the elbo, # preconditioned by the inverse Fisher Information Matrix. The Fisher, # in the case of a diagonal gaussian, is a diagonal matrix that is a # simple function of the variance. Intuitively, statistical distance # created by perturbing the mean of an independent Gaussian is # determined by how wide the distribution is along that dimension --- # the wider the distribution, the less sensitive statistical distances is # to perturbations of the mean; the narrower the distribution, the more # the statistical distance changes when you perturb the mean (imagine # an extremely narrow Gaussian --- basically a spike. The KL between # this Gaussian and a Gaussian $\epsilon$ away in location can be big --- # moving the Gaussian could significantly reduce overlap in support # which corresponds to a greater statistical distance).
# Specify an inference problem by its unnormalized log-density. # it's difficult to see the benefit in low dimensions # model parameters are a mean and a log_sigma np.random.seed(42) obs_dim = 20 Y = np.random.randn(obs_dim, obs_dim).dot(np.random.randn(obs_dim)) def log_density(x, t): mu, log_sigma = x[:, :obs_dim], x[:, obs_dim:] sigma_density = np.sum(norm.logpdf(log_sigma, 0, 1.35), axis=1) mu_density = np.sum(norm.logpdf(Y, mu, np.exp(log_sigma)), axis=1) return sigma_density + mu_density # Build variational objective. D = obs_dim * 2 # dimension of our posterior objective, gradient, unpack_params = \ black_box_variational_inference(log_density, D, num_samples=2000) # Define the natural gradient # The natural gradient of the ELBO is the gradient of the elbo, # preconditioned by the inverse Fisher Information Matrix. The Fisher, # in the case of a diagonal gaussian, is a diagonal matrix that is a # simple function of the variance. Intuitively, statistical distance # created by perturbing the mean of an independent Gaussian is # determined by how wide the distribution is along that dimension --- # the wider the distribution, the less sensitive statistical distances is # to perturbations of the mean; the narrower the distribution, the more # the statistical distance changes when you perturb the mean (imagine # an extremely narrow Gaussian --- basically a spike. The KL between # this Gaussian and a Gaussian $\epsilon$ away in location can be big --- # moving the Gaussian could significantly reduce overlap in support # which corresponds to a greater statistical distance).