def run_aevb(train_images): # run_aevb(train_images) start_time = time.time() # Create aevb function # Training parameters D = train_images.shape[1] dec_layers = [latent_dimensions, hidden_units, hidden_units, D] mean = np.zeros(latent_dimensions) log_stddevs = np.log(1.0 * np.ones(latent_dimensions)) log_stepsize = np.log(.005) rs = np.random.npr.RandomState(0) sample_and_run_es = build_early_stop_fixed_params( latent_dimensions, approx=True, mean=mean, log_stddevs=log_stddevs, log_stepsize=log_stepsize) N_weights_dec, decoder, decoder_log_like = make_binary_nn(dec_layers) N_weights_enc = 0 encoder = sample_and_run_es # Build parser parser = WeightsParser() parser.add_shape('decoding weights', (N_weights_dec, )) params = np.zeros(len(parser)) parser.put(params, 'decoding weights', rs.randn(N_weights_dec) * param_scale) assert len(parser) == N_weights_dec # Optimize aevb batch_size = 1 num_training_iters = 1600 rs = npr.RandomState(0) batch_idxs = make_batches(train_images.shape[0], batch_size) def batch_value_and_grad(weights, iter): iter = iter % len(batch_idxs) cur_data = train_images[batch_idxs[iter]] return lower_bound(weights, encoder, decoder_log_like, N_weights_enc, cur_data, samples_per_image, latent_dimensions, rs) lb_grad = grad(batch_value_and_grad) def callback(params, i, grad): n_iter = 0.0 sum_ml = 0 for j in xrange(0, 1): ml = batch_value_and_grad(params, j) print "---- log marginal likelihood:", ml n_iter += 1 sum_ml += ml print '-------- avg_ml', sum_ml / n_iter #Generate samples num_samples = 100 images_per_row = 10 zs = rs.randn(num_samples, latent_dimensions) # samples = np.random.binomial(1,decoder(parser.get(params, 'decoding weights'), zs)) samples = decoder(parser.get(params, 'decoding weights'), zs) fig = plt.figure(1) fig.clf() ax = fig.add_subplot(111) plot_images(samples, ax, ims_per_row=images_per_row) plt.savefig('samples.png') final_params = adam(lb_grad, params, num_training_iters, callback=callback) def decoder_with_weights(zs): return decoder(parser.get(final_params, 'decoding weights'), zs) return decoder_with_weights finish_time = time.time() print "total runtime", finish_time - start_time
def run_aevb(train_images): start_time = time.time() # Create aevb function # Training parameters D = train_images.shape[1] enc_layers = [D, hidden_units, hidden_units, 2 * latent_dimensions] dec_layers = [latent_dimensions, hidden_units, hidden_units, D] N_weights_enc, encoder, encoder_log_like = make_gaussian_nn(enc_layers) N_weights_dec, decoder, decoder_log_like = make_binary_nn(dec_layers) # Optimize aevb batch_size = 100 num_training_iters = 1600 rs = npr.RandomState(0) parser = WeightsParser() parser.add_shape('encoding weights', (N_weights_enc, )) initial_combined_weights = rs.randn(len(parser)) * param_scale parser.add_shape('decoding weights', (N_weights_dec, )) params = np.zeros(len(parser)) dec_w = get_pretrained_dec_w() parser.put(params, 'encoding weights', initial_combined_weights) parser.put(params, 'decoding weights', dec_w) batch_idxs = make_batches(train_images.shape[0], batch_size) def batch_value_and_grad(weights, iter): iter = iter % len(batch_idxs) cur_data = train_images[batch_idxs[iter]] return lower_bound(weights, encoder, decoder_log_like, N_weights_enc, cur_data, samples_per_image, latent_dimensions, rs) lb_grad = grad(batch_value_and_grad) def callback(params, i, grad): ml = batch_value_and_grad(params, i) print "log marginal likelihood:", ml #Generate samples num_samples = 100 images_per_row = 10 zs = rs.randn(num_samples, latent_dimensions) samples = decoder(parser.get(params, 'decoding weights'), zs) fig = plt.figure(1) fig.clf() ax = fig.add_subplot(111) plot_images(samples, ax, ims_per_row=images_per_row) plt.savefig('samples.png') final_params = adam(lb_grad, params, num_training_iters, callback=callback) def decoder_with_weights(zs): return decoder(parser.get(final_params, 'decoding weights'), zs) return decoder_with_weights finish_time = time.time() print "total runtime", finish_time - start_time
num_training_iters = 100 sampler_learn_rate = 0.01 batch_idxs = make_batches(train_images.shape[0], batch_size) init_enc_w = rs.randn(N_weights_enc) * param_scale init_dec_w = rs.randn(N_weights_dec) * param_scale flow_sampler, flow_parser = build_flow_sampler(latent_dimension, num_flow_steps) combined_parser = WeightsParser() combined_parser.add_shape('encoder weights', N_weights_enc) combined_parser.add_shape('decoder weights', N_weights_dec) combined_parser.add_shape('flow params', len(flow_parser)) combined_params = np.zeros(len(combined_parser)) combined_parser.put(combined_params, 'encoder weights', init_enc_w) combined_parser.put(combined_params, 'flow params', init_flow_params(flow_parser, rs)) combined_parser.put(combined_params, 'decoder weights', init_dec_w) def get_batch_lower_bound(cur_params, iter): encoder_weights = combined_parser.get(cur_params, 'encoder weights') flow_params = combined_parser.get(cur_params, 'flow params') decoder_weights = combined_parser.get(cur_params, 'decoder weights') cur_data = train_images[batch_idxs[iter]] mus, log_sigs = encoder(encoder_weights, cur_data) samples, entropy_estimates = flow_sampler(flow_params, mus, np.exp(log_sigs), rs) loglikes = decoder_log_like(decoder_weights, samples, cur_data) print "Iter", iter, "loglik:", np.mean(loglikes).value, \ "entropy:", np.mean(entropy_estimates).value, "marg. like:", np.mean(entropy_estimates + loglikes).value
def time_and_acc(latent_dimension): start_time = time.time() rs = np.random.npr.RandomState(0) #load_and_pickle_binary_mnist() with open('../../../autopaint/mnist_binary_data.pkl') as f: N_data, train_images, train_labels, test_images, test_labels = pickle.load( f) D = train_images.shape[1] enc_layer_sizes = [D, hidden_units, 2 * latent_dimension] dec_layer_sizes = [latent_dimension, hidden_units, D] N_weights_enc, encoder, encoder_log_like = make_gaussian_nn( enc_layer_sizes) N_weights_dec, decoder, decoder_log_like = make_binary_nn(dec_layer_sizes) # Optimization parameters. batch_size = 100 num_training_iters = 100 sampler_learn_rate = 0.01 batch_idxs = make_batches(train_images.shape[0], batch_size) init_enc_w = rs.randn(N_weights_enc) * param_scale init_dec_w = rs.randn(N_weights_dec) * param_scale flow_sampler, flow_parser = build_flow_sampler(latent_dimension, num_flow_steps) combined_parser = WeightsParser() combined_parser.add_shape('encoder weights', N_weights_enc) combined_parser.add_shape('decoder weights', N_weights_dec) combined_parser.add_shape('flow params', len(flow_parser)) combined_params = np.zeros(len(combined_parser)) combined_parser.put(combined_params, 'encoder weights', init_enc_w) combined_parser.put(combined_params, 'flow params', init_flow_params(flow_parser, rs, latent_dimension)) combined_parser.put(combined_params, 'decoder weights', init_dec_w) def get_batch_lower_bound(cur_params, iter): encoder_weights = combined_parser.get(cur_params, 'encoder weights') flow_params = combined_parser.get(cur_params, 'flow params') decoder_weights = combined_parser.get(cur_params, 'decoder weights') cur_data = train_images[batch_idxs[iter]] mus, log_sigs = encoder(encoder_weights, cur_data) samples, entropy_estimates = flow_sampler(flow_params, mus, np.exp(log_sigs), rs) loglikes = decoder_log_like(decoder_weights, samples, cur_data) print "Iter", iter, "loglik:", np.mean(loglikes).value, \ "entropy:", np.mean(entropy_estimates).value, "marg. like:", np.mean(entropy_estimates + loglikes).value lastVal = np.mean(entropy_estimates + loglikes).value with open('lastVal.pkl', 'w') as f: pickle.dump(lastVal, f, 1) return np.mean(entropy_estimates + loglikes) lb_grad = grad(get_batch_lower_bound) def callback(weights, iter, grad): #Generate samples num_samples = 100 zs = rs.randn(num_samples, latent_dimension) samples = decoder(combined_parser.get(weights, 'decoder weights'), zs) fig = plt.figure(1) fig.clf() ax = fig.add_subplot(111) plot_images(samples, ax, ims_per_row=10) plt.savefig('samples.png') final_params = adam(lb_grad, combined_params, num_training_iters, callback=callback) finish_time = time.time() # #Broken and very mysterious: # lb_val_grad = value_and_grad(get_batch_lower_bound) # lb_est = lb_val_grad(final_params,num_training_iters+2) # print lb_est # lb_est = lb_est[0] with open('lastVal.pkl') as f: lb_est = pickle.load(f) print 'lb_est is', lb_est print "Total training time:", finish_time - start_time return finish_time, lb_est