def lower_bound(weights, encode, decode_log_like, N_weights_enc, train_images, samples_per_image, latent_dimensions, rs): enc_w = weights[0:N_weights_enc] dec_w = weights[N_weights_enc:len(weights)] log_normal = build_logprob_mvn(np.zeros(latent_dimensions), np.eye(latent_dimensions), pseudo_inv=True) #Choose an image from train_images for idx in xrange(train_images.shape[0]): x = train_images[idx, :] def log_lik_func(z): return decode_log_like(dec_w, z, x) + log_normal(z) sample, loglik_estimate, entropy_estimate = encode( enc_w, log_lik_func, rs, 1) if idx == 0: samples = sample loglik_estimates = loglik_estimate entropy_estimates = entropy_estimate else: samples = np.concatenate((samples, sample), axis=0) loglik_estimates = np.concatenate( (loglik_estimates, loglik_estimate), axis=0) entropy_estimates = np.concatenate( (entropy_estimates, entropy_estimate), axis=0) print "ll average", loglik_estimate print "kl average", entropy_estimate return loglik_estimate + entropy_estimate
def test_spherical_mvn(): D = 10 mvn1 = build_logprob_mvn(np.zeros(D), np.eye(D)) mvn2 = build_logprob_standard_normal(D) points = npr.randn(4, D) assert np.all(np.abs(mvn1(points) - mvn2(points) < 0.001)),\ "mvn1: {}, mvn2: {}".format(mvn1(points), mvn2(points))
def lower_bound(weights,encode,decode_log_like,N_weights_enc,train_images,samples_per_image,latent_dimensions,rs): enc_w = weights[0:N_weights_enc] dec_w = weights[N_weights_enc:len(weights)] log_normal = build_logprob_mvn(np.zeros(latent_dimensions), np.eye(latent_dimensions),pseudo_inv = True) def log_lik_func(z): # train_images_repeat = np.repeat(train_images,samples_per_image,axis=0) assert z.shape[0] == train_images.shape[0] return decode_log_like(dec_w,z,train_images) +log_normal(z) samples, loglik_estimates, entropy_estimates = encode(enc_w,log_lik_func,rs,num_images=train_images.shape[0],samples_per_image=samples_per_image) loglik_estimate = np.mean(loglik_estimates) entropy_estimate = np.mean(entropy_estimates) print "ll average", loglik_estimate print "ent average", entropy_estimate return loglik_estimate + entropy_estimate
num_sampler_optimization_steps = 400 sampler_learn_rate = 0.01 init_init_stddev_scale = 1.0 init_langevin_stepsize = 0.1 init_langevin_noise_size = 0.001 init_gradient_power = 0.95 D = 2 init_mean = np.zeros(D) init_stddevs = np.log(init_init_stddev_scale * np.ones((1,D))) init_log_stepsizes = np.log(init_langevin_stepsize * np.ones(num_steps)) init_log_noise_sizes = np.log(init_langevin_noise_size * np.ones(num_steps)) init_log_gradient_scales = np.log(np.ones((1,D))) logprob_mvn = build_logprob_mvn(mean=np.array([0.2,0.4]), cov=np.array([[1.0,0.9], [0.9,1.0]])) sample, parser = build_langevin_sampler(logprob_two_moons, D, num_steps, approx=False) sampler_params = np.zeros(len(parser)) parser.put(sampler_params, 'mean', init_mean) parser.put(sampler_params, 'log_stddev', init_stddevs) parser.put(sampler_params, 'log_stepsizes', init_log_stepsizes) parser.put(sampler_params, 'log_noise_sizes', init_log_noise_sizes) parser.put(sampler_params, 'log_gradient_scales', init_log_gradient_scales) parser.put(sampler_params, 'invsig_gradient_power', inv_sigmoid(init_gradient_power)) def get_batch_marginal_likelihood_estimate(sampler_params): samples, likelihood_estimates, entropy_estimates = sample(sampler_params, rs, num_samples) print "Mean loglik:", np.mean(likelihood_estimates.value),\ "Mean entropy:", np.mean(entropy_estimates.value) plot_density(samples.value, "approximating_dist.png")
# with open('parameters.pkl') as f: # parameters = pickle.load(f) with open('mnist_models.pkl') as f: trained_weights, all_mean, all_cov = pickle.load(f) banded_cov = create_banded_cov(all_cov.shape[0], 100) # Build likelihood model. L2_reg = 1 layer_sizes = [784, 200, 100, 10] num_weights, make_predictions, likelihood = make_classification_nn( layer_sizes) classifier_loglik = lambda image, c: make_predictions( trained_weights, np.atleast_2d(image))[:, c] image_prior = build_logprob_mvn(all_mean, banded_cov) # Combine prior and likelihood. model_ll = lambda image, c: image_prior(image) + classifier_loglik( image, c) def model_nll(image, c): return -1 * model_ll(image, c) model_nll_with_grad = value_and_grad(model_nll) # Optimize a random image to maximize this likelihood. cur_class = 1 start_image = np.ones((28 * 28)) #all_mean # quick_grad_check(data_L, start_image)
init_langevin_noise_size = 0.01 init_gradient_power = 0.5 with open('mnist_models.pkl') as f: trained_weights, all_mean, all_cov = pickle.load(f) D = 28 * 28 # Regularize all_cov prior_relax = 0.05 all_cov = all_cov + prior_relax * np.eye(D) print "True normalizing constant:", log_normalizing_constant_of_a_guassian( all_cov) print "True entropy:", entropy_of_a_gaussian(all_cov) prior_func = build_logprob_mvn(all_mean, all_cov) init_mean = all_mean init_stddevs = np.log(init_init_stddev_scale * np.ones((1, D))) init_log_stepsizes = np.log(init_langevin_stepsize * np.ones(num_langevin_steps)) init_log_noise_sizes = np.log(init_langevin_noise_size * np.ones(num_langevin_steps)) init_log_gradient_scales = np.log(np.ones((1, D))) sample_and_run_langevin, parser = build_langevin_sampler( prior_func, D, num_langevin_steps, approx=True) sampler_params = np.zeros(len(parser)) parser.put(sampler_params, 'mean', init_mean) parser.put(sampler_params, 'log_stddev', init_stddevs)
def run_variational_network(train_images, N_weights_dec, decoder, decoder_log_like, trained_weights, all_mean): start_time = time.time() # Create aevb function # Training parameters D = train_images.shape[1] enc_layers = [D, hidden_units, hidden_units, 2 * latent_dimensions] N_weights_enc, encoder, encoder_log_like = make_gaussian_nn(enc_layers) # Optimize aevb batch_size = 10 num_training_iters = 1600 rs = npr.RandomState(0) parser = WeightsParser() parser.add_shape('encoding weights', (N_weights_enc, )) initial_enc_w = rs.randn(len(parser)) * param_scale batch_idxs = make_batches(train_images.shape[0], batch_size) banded_cov = create_banded_cov(all_cov.shape[0], 10) log_prior = build_logprob_mvn(all_mean, banded_cov) def batch_value_and_grad(enc_w, iter): iter = iter % len(batch_idxs) cur_data = train_images[batch_idxs[iter]] return enc_lower_bound(enc_w, trained_weights, encoder, decoder_log_like, log_prior, N_weights_enc, cur_data, samples_per_image, latent_dimensions, rs) lb_grad = grad(batch_value_and_grad) def callback(params, i, grad): ml = batch_value_and_grad(params, i) print "log marginal likelihood:", ml #Generate samples num_samples = 100 images_per_row = 10 # zs = train_images[0:100,:] zs = np.zeros((100, 10)) zs[:, 1] = .5 zs[:, 5] = .5 (mus, log_sigs) = encoder(params, zs) # sigs = np.exp(log_sigs) # noise = rs.randn(1,100,784) # samples = mus + sigs*noise # samples = np.reshape(samples,(100*1,784),order = 'F') samples = mus fig = plt.figure(1) fig.clf() ax = fig.add_subplot(111) plot_images(samples, ax, ims_per_row=images_per_row) plt.savefig('samples.png') final_params = adam(lb_grad, initial_enc_w, num_training_iters, callback=callback) def decoder_with_weights(zs): return decoder(parser.get(final_params, 'decoding weights'), zs) return decoder_with_weights finish_time = time.time() print "total runtime", finish_time - start_time
if __name__ == '__main__': t0 = time.time() num_samples = 100 num_sampler_optimization_steps = 200 sampler_learn_rate = .2 D = 2 init_mean = np.zeros(D) init_log_stddevs = np.log(1.0 * np.ones(D)) init_log_stepsize = np.log(0.01) rs = np.random.npr.RandomState(0) logprob_mvn = build_logprob_mvn(np.zeros(2), np.array([[1, .9], [.9, 1]])) sample_and_run_early_stop, parser = build_early_stop(D, approx=False) sampler_params = np.zeros(len(parser)) parser.put(sampler_params, 'mean', init_mean) parser.put(sampler_params, 'log_stddev', init_log_stddevs) parser.put(sampler_params, 'log_stepsize', init_log_stepsize) def get_batch_marginal_likelihood_estimate(sampler_params): for i in xrange(num_samples): sample, loglik_estimate, entropy_estimate = sample_and_run_early_stop( sampler_params, logprob_mvn, rs, 1) if i == 0: samples = sample loglik_estimates = loglik_estimate entropy_estimates = entropy_estimate
num_sampler_optimization_steps = 400 sampler_learn_rate = 1e-5 D = 2 init_mean = np.zeros(D) init_log_stddevs = np.log(.1*np.ones(D)) hmc_log_stepsize = np.log(.1) mass_mat = np.eye(D) v_A = np.zeros(D) v_B = np.zeros(D) v_log_cov = np.log(.01*np.ones(D)) rev_A = np.zeros(D) rev_B = np.zeros(D) rev_log_cov = np.log(.01*np.ones(D)) logprob_mvn = build_logprob_mvn(mean=np.array([0.0,0.0]), cov=np.array([[1.0,0.9], [0.9,1.0]]),pseudo_inv = False) hmc_sample, parser = build_hmc_sampler(logprob_mvn, D, num_steps,leap_steps) sampler_params = np.zeros(len(parser)) parser.put(sampler_params, 'mean', init_mean) parser.put(sampler_params, 'log_stddev', init_log_stddevs) parser.put(sampler_params, 'hmc_log_stepsize', hmc_log_stepsize) parser.put(sampler_params, 'mass_mat', mass_mat) parser.put(sampler_params, 'v_A', v_A) parser.put(sampler_params, 'v_B', v_B) parser.put(sampler_params, 'v_log_cov', v_log_cov) parser.put(sampler_params, 'rev_A', rev_A) parser.put(sampler_params, 'rev_B', rev_B) parser.put(sampler_params, 'rev_log_cov', rev_log_cov) def get_batch_marginal_likelihood_estimate(sampler_params):