def run(): print "Running experiment..." results = defaultdict(list) for i in xrange(N_samples): print i, def callback(x, t, v, entropy, log_decay): results[("x", i)].append( x.copy()) # Replace this with a loop over kwargs? results[("entropy", i)].append(entropy) results[("velocity", i)].append(v) results[("log decay", i)].append(log_decay[0]) results[("likelihood", i)].append(-nllfun(x)) rs = RandomState((seed, i)) x0 = rs.randn(D) * init_scale v0 = rs.randn(D) # TODO: account for entropy of init. init_entropy = 0.5 * D * (1 + np.log(2*np.pi))\ + np.sum(np.log(init_scale)) aed3(gradfun, callback=callback, x=x0, v=v0, learn_rate=epsilon, iters=N_iter, init_log_decay=init_log_decay, decay_learn_rate=decay_learn_rate, entropy=init_entropy) return results
def run(): print "Running experiment..." sgd_optimized_points = [] for i in xrange(N_samples): rs = RandomState((seed, i)) x0 = rs.randn(D) * x_init_scale v0 = rs.randn(D) * v_init_scale sgd_optimized_points.append( sgd(grad(nllfunt), x=x0, v=v0, learn_rate=alpha, decay=decay, iters=N_iter)) return sgd_optimized_points, entropy
def run(): (train_images, train_labels),\ (tests_images, tests_labels) = load_data_subset(N_train, N_tests) parser, pred_fun, nllfun, frac_err = make_nn_funs(layer_sizes) N_param = len(parser.vect) print "Running experiment..." results = defaultdict(list) for i in xrange(N_samples): def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i, i_iter)) idxs = rs.randint(N_train, size=batch_size) nll = nllfun(w, train_images[idxs], train_labels[idxs]) * N_train nlp = neg_log_prior(w) return nll + nlp gradfun = grad(indexed_loss_fun) def callback(x, t, v, entropy): results[("entropy", i)].append(entropy / N_train) results[("v_norm", i)].append(norm(v) / np.sqrt(N_param)) results[("minibatch_likelihood", i)].append(-indexed_loss_fun(x, t)) results[("log_prior_per_dpt", i)].append(-neg_log_prior(x) / N_train) if t % thin != 0 and t != N_iter and t != 0: return results[('iterations', i)].append(t) results[("train_likelihood", i)].append(-nllfun(x, train_images, train_labels)) results[("tests_likelihood", i)].append(-nllfun(x, tests_images, tests_labels)) results[("tests_error", i)].append(frac_err(x, tests_images, tests_labels)) print "Iteration {0:5} Train likelihood {1:2.4f} Test likelihood {2:2.4f}" \ " Test Err {3:2.4f}".format(t, results[("train_likelihood", i)][-1], results[("tests_likelihood", i)][-1], results[("tests_error", i)][-1]) rs = RandomState((seed, i)) x0 = rs.randn(N_param) * init_scale v0 = rs.randn(N_param) # TODO: account for entropy of init. #entropy = 0.5 * D * (1 + np.log(2*np.pi)) + np.sum(np.log(x_scale)) + 0.5 * (D - norm(v) **2) aed3(gradfun, callback=callback, x=x0, v=v0, learn_rate=epsilon, iters=N_iter, init_log_decay=init_log_decay, decay_learn_rate=decay_learn_rate) return results
def test_approx_log_det(): D = 100 rs = RandomState(0) mat = np.eye(D) - 0.1 * np.diag(rs.rand(D)) mvp = lambda v: np.dot(mat, v) N_trials = 10000 approx = 0 for i in xrange(N_trials): approx += approx_log_det(mvp, D, rs) approx = approx / N_trials exact = exact_log_det(mvp, D) assert exact > approx > (exact - 0.1 * np.abs(exact)) print exact, approx
def test_approx_log_det(): D = 100 rs = RandomState(0) mat = np.eye(D) - 0.1 * np.diag(rs.rand(D)) mvp = lambda v : np.dot(mat, v) N_trials = 10000 approx = 0 for i in xrange(N_trials): approx += approx_log_det(mvp, D, rs) approx = approx / N_trials exact = exact_log_det(mvp, D) assert exact > approx > (exact - 0.1 * np.abs(exact)) print exact, approx
def run(): (train_images, train_labels), (tests_images, tests_labels) = load_data_subset(N_train, N_tests) parser, pred_fun, nllfun, frac_err = make_nn_funs(layer_sizes) N_param = len(parser.vect) print "Running experiment..." results = defaultdict(list) for i in xrange(N_samples): def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i, i_iter)) idxs = rs.randint(N_train, size=batch_size) nll = nllfun(w, train_images[idxs], train_labels[idxs]) * N_train nlp = neg_log_prior(w) return nll + nlp gradfun = grad(indexed_loss_fun) def callback(x, t, v, entropy): results[("entropy", i)].append(entropy / N_train) results[("v_norm", i)].append(norm(v) / np.sqrt(N_param)) results[("minibatch_likelihood", i)].append(-indexed_loss_fun(x, t)) results[("log_prior_per_dpt", i)].append(-neg_log_prior(x) / N_train) if t % thin != 0 and t != N_iter and t != 0: return results[("iterations", i)].append(t) results[("train_likelihood", i)].append(-nllfun(x, train_images, train_labels)) results[("tests_likelihood", i)].append(-nllfun(x, tests_images, tests_labels)) results[("tests_error", i)].append(frac_err(x, tests_images, tests_labels)) print "Iteration {0:5} Train likelihood {1:2.4f} Test likelihood {2:2.4f}" " Test Err {3:2.4f}".format( t, results[("train_likelihood", i)][-1], results[("tests_likelihood", i)][-1], results[("tests_error", i)][-1], ) rs = RandomState((seed, i)) x0 = rs.randn(N_param) * init_scale v0 = rs.randn(N_param) # TODO: account for entropy of init. # entropy = 0.5 * D * (1 + np.log(2*np.pi)) + np.sum(np.log(x_scale)) + 0.5 * (D - norm(v) **2) aed3( gradfun, callback=callback, x=x0, v=v0, learn_rate=epsilon, iters=N_iter, init_log_decay=init_log_decay, decay_learn_rate=decay_learn_rate, ) return results
def run(): print "Running experiment..." sgd_optimized_points = [] ed_optimized_points = [] aed_optimized_points = [] asgd_optimized_points = [] for i in xrange(N_samples): rs = RandomState((seed, i)) x0 = rs.randn(D) * x_init_scale v0 = rs.randn(D) * v_init_scale sgd_optimized_points.append( sgd(grad(nllfunt), x=x0, v=v0, learn_rate=alpha, decay=decay, iters=N_iter)) rs = RandomState((seed, i)) x0 = rs.randn(D) * x_init_scale v0 = rs.randn(D) * v_init_scale ed_optimized_points.append( entropic_descent(grad(nllfunt), x=x0, v=v0, learn_rate=alpha, decay=decay, iters=N_iter, theta=theta, rs=rs)) entropy = np.log(decay) * D * N_iter rs = RandomState((seed, i)) x0 = rs.randn(D) * x_init_scale v0 = rs.randn(D) * v_init_scale aed_optimized_points.append( adaptive_entropic_descent(grad(nllfunt), x=x0, v=v0, init_learn_rate=alpha, init_log_decay=np.log(decay), meta_learn_rate=meta_alpha, meta_decay=meta_decay, iters=N_iter)) rs = RandomState((seed, i)) x0 = rs.randn(D) * x_init_scale v0 = rs.randn(D) * v_init_scale asgd_optimized_points.append( adaptive_sgd(grad(nllfunt), x=x0, v=v0, init_learn_rate=alpha, init_log_decay=np.log(decay), meta_learn_rate=meta_alpha, meta_decay=meta_decay, iters=N_iter)) return sgd_optimized_points, ed_optimized_points, aed_optimized_points, asgd_optimized_points, entropy
def run(): (train_images, train_labels),\ (tests_images, tests_labels) = load_data_subset(N_train, N_tests) parser, pred_fun, nllfun, frac_err = make_nn_funs(layer_sizes, L2_per_dpt) N_param = len(parser.vect) print "Running experiment..." results = defaultdict(list) for i in xrange(N_samples): x_init_scale = np.full(N_param, init_scale) def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i, i_iter)) idxs = rs.randint(N_train, size=batch_size) return nllfun(w, train_images[idxs], train_labels[idxs]) * N_train gradfun = grad(indexed_loss_fun) def callback(x, t, v, entropy): results[("entropy", i)].append(entropy / N_train) results[("v_norm", i)].append(norm(v) / np.sqrt(N_param)) results[("minibatch_likelihood", i)].append(-indexed_loss_fun(x, t)) if t % thin != 0 and t != N_iter and t != 0: return results[('iterations', i)].append(t) results[("train_likelihood", i)].append(-nllfun(x, train_images, train_labels)) results[("tests_likelihood", i)].append(-nllfun(x, tests_images, tests_labels)) results[("tests_error", i)].append(frac_err(x, tests_images, tests_labels)) print "Iteration {0:5} Train likelihood {1:2.4f} Test likelihood {2:2.4f}" \ " Test Err {3:2.4f}".format(t, results[("train_likelihood", i)][-1], results[("tests_likelihood", i)][-1], results[("tests_error", i)][-1]) rs = RandomState((seed, i)) entropic_descent2(gradfun, callback=callback, x_scale=x_init_scale, epsilon=epsilon, gamma=gamma, alpha=alpha, annealing_schedule=annealing_schedule, rs=rs) return results
def run(): x_init_scale = np.full(D, init_scale) print "Running experiment..." results = defaultdict(list) for i in xrange(N_samples): def callback(x, t, entropy): if i < N_samples_trails: results[("trail_x", i)].append(x.copy()) results[("entropy", i)].append(entropy) results[("likelihood", i)].append(-nllfun(x)) if t in snapshot_times: results[("all_x", t)].append(x.copy()) rs = RandomState((seed, i)) x, entropy = sgd_entropic(gradfun, x_scale=x_init_scale, N_iter=N_iter, learn_rate=alpha, rs=rs, callback=callback, approx=False, mu=init_mu) callback(x, N_iter, entropy) return results
def run(): print "Running experiment..." results = defaultdict(list) for i in xrange(N_samples): def callback(x, t, g, v, entropy): results[("x", i, t)] = x.copy() # Replace this with a loop over kwargs? results[("entropy", i, t)] = entropy results[("velocity", i, t)] = v results[("likelihood", i, t)] = -nllfun(x) rs = RandomState((seed, i)) x, entropy = entropic_descent2(gradfun, callback=callback, x_scale=x_init_scale, epsilon=epsilon, gamma=gamma, iters=N_iter, rs=rs) results[("x", i, N_iter)] = x results[("entropy", i, N_iter)] = x return results
def run(): # annealing_schedule = np.linspace(0,1,N_iter) annealing_schedule = np.concatenate( (np.zeros(N_iter / 3), np.linspace(0, 1, N_iter / 3), np.ones(N_iter / 3))) print "Running experiment..." results = defaultdict(list) for i in xrange(N_samples): def callback(x, t, v, entropy): results[("x", i)].append( x.copy()) # Replace this with a loop over kwargs? results[("entropy", i)].append(entropy) results[("velocity", i)].append(v) results[("likelihood", i)].append(-nllfun(x)) rs = RandomState((seed, i)) x, entropy = entropic_descent2(gradfun, callback=callback, x_scale=x_init_scale, epsilon=epsilon, gamma=gamma, alpha=alpha, annealing_schedule=annealing_schedule, rs=rs) return results
def run(): x_init_scale = np.full(D, init_scale) annealing_schedule = np.linspace(0, 1, N_iter) print "Running experiment..." results = defaultdict(list) for i in xrange(N_samples): def callback(x, t, v, entropy): if i < N_samples_trails: results[("trail_x", i)].append(x.copy()) results[("trail_v", i)].append(v.copy()) results[("entropy", i)].append(entropy) results[("likelihood", i)].append(-nllfun(x)) if t in snapshot_times: results[("all_x", t)].append(x.copy()) results[("all_v", t)].append(v.copy()) rs = RandomState((seed, i)) entropic_descent2(gradfun, callback=callback, x_scale=x_init_scale, epsilon=epsilon, gamma=gamma, alpha=alpha, annealing_schedule=annealing_schedule, rs=rs) return results
def run(): train_inputs, train_targets,\ tests_inputs, tests_targets, unscale_y = load_boston_housing(train_frac) N_train = train_inputs.shape[0] batch_size = N_train alpha = alpha_un / N_train parser, pred_fun, nllfun, rmse = make_regression_nn_funs(layer_sizes) N_param = len(parser.vect) def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i, i_iter)) idxs = rs.randint(N_train, size=batch_size) nll = nllfun(w, train_inputs[idxs], train_targets[idxs]) * N_train return nll gradfun = grad(indexed_loss_fun) def callback(x, t, entropy): results["entropy_per_dpt"].append(entropy / N_train) results["x_rms"].append(np.sqrt(np.mean(x * x))) results["minibatch_likelihood"].append(-indexed_loss_fun(x, t)) results["log_prior_per_dpt"].append(-neg_log_prior(x) / N_train) if t % thin != 0 and t != N_iter and t != 0: return results["iterations"].append(t) results["train_likelihood"].append( -nllfun(x, train_inputs, train_targets)) results["tests_likelihood"].append( -nllfun(x, tests_inputs, tests_targets)) results["train_rmse"].append( unscale_y(rmse(x, train_inputs, train_targets))) results["tests_rmse"].append( unscale_y(rmse(x, tests_inputs, tests_targets))) results["marg_likelihood"].append( estimate_marginal_likelihood(results["train_likelihood"][-1], results["entropy_per_dpt"][-1])) print "Iteration {0:5} Train lik {1:2.4f} Test lik {2:2.4f}" \ " Marg lik {3:2.4f} Test RMSE {4:2.4f}".format( t, results["train_likelihood"][-1], results["tests_likelihood"][-1], results["marg_likelihood" ][-1], results["tests_rmse" ][-1]) all_results = [] for i in xrange(N_samples): results = defaultdict(list) rs = RandomState((seed, i)) sgd_entropic_damped(gradfun, np.full(N_param, init_scale), N_iter, alpha, rs, callback, width=1) all_results.append(results) return all_results
def run(): (train_images, train_labels),\ (tests_images, tests_labels) = load_data_subset(N_train, N_tests) parser, pred_fun, nllfun, frac_err = make_nn_funs(layer_sizes) N_param = len(parser.vect) print "Number of parameters in model: ", N_param def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i_iter)) idxs = rs.randint(N_train, size=batch_size) nll = nllfun(w, train_images[idxs], train_labels[idxs]) * N_train nlp = neg_log_prior(w) return nll + nlp gradfun = grad(indexed_loss_fun) def callback(x, t, entropy): results["entropy_per_dpt"].append(entropy / N_train) results["x_rms"].append(np.sqrt(np.mean(x * x))) results["minibatch_likelihood"].append(-indexed_loss_fun(x, t)) results["log_prior_per_dpt"].append(-neg_log_prior(x) / N_train) if t % thin != 0 and t != N_iter and t != 0: return results["iterations"].append(t) results["train_likelihood"].append( -nllfun(x, train_images, train_labels)) results["tests_likelihood"].append( -nllfun(x, tests_images, tests_labels)) results["tests_error"].append(frac_err(x, tests_images, tests_labels)) results["marg_likelihood"].append( estimate_marginal_likelihood(results["train_likelihood"][-1], results["entropy_per_dpt"][-1])) print "Iteration {0:5} Train lik {1:2.4f} Test lik {2:2.4f}" \ " Marg lik {3:2.4f} Test err {4:2.4f}".format( t, results["train_likelihood"][-1], results["tests_likelihood"][-1], results["marg_likelihood" ][-1], results["tests_error" ][-1]) all_results = [] preds = defaultdict(list) for width in widths: results = defaultdict(list) rs = RandomState((seed)) sgd_entropic_damped(gradfun, np.full(N_param, init_scale), N_iter, alpha, rs, callback, width=width) all_results.append(results) return all_results
def run(): print "Running experiment..." results = defaultdict(list) for i in xrange(N_samples): print i, def callback(x, t, v, entropy, log_decay): results[("x", i)].append(x.copy()) # Replace this with a loop over kwargs? results[("entropy", i)].append(entropy) results[("velocity", i)].append(v) results[("log decay", i)].append(log_decay[0]) results[("likelihood", i)].append(-nllfun(x)) rs = RandomState((seed, i)) x0 = rs.randn(D) * init_scale v0 = rs.randn(D) # TODO: account for entropy of init. init_entropy = 0.5 * D * (1 + np.log(2*np.pi))\ + np.sum(np.log(init_scale)) aed3(gradfun, callback=callback, x=x0, v=v0, learn_rate=epsilon, iters=N_iter, init_log_decay=init_log_decay, decay_learn_rate=decay_learn_rate, entropy=init_entropy) return results
def run(): rs = RandomState(0) grad_posterior = grad(log_posterior) bin_counts = {i_iter: np.zeros(N_bins) for i_iter in save_iters} h = 1.0 / bin_width / N_samp for i_samp in xrange(N_samp): x = init_samp(rs) for i_iter in xrange(N_iters): if i_iter in bin_counts: bin_counts[i_iter][bin_idx(x)] += h x += alpha * grad_posterior(x) return bin_counts
def run(): print "Running experiment..." results = defaultdict(list) for i in xrange(N_samples): print i, def callback(**kwargs): for k, v in kwargs.iteritems(): results[(k, i)].append(copy(v)) results[("likelihood", i)].append(-nllfun(kwargs['x'])) results[("x_minus_mu_sq", i)].append((kwargs['x'] - mu)**2) rs = RandomState((seed, i)) sgd_entropic(gradfun, np.full(D, init_scale), N_iter, alpha, rs, callback) return results
def run(): rs = RandomState((seed, "top")) X, V = initialize(rs) x0 = X.val forward_path = [X.val[0]] densities = [] for i_iter in range(all_N_iter[-1] + 1): X, V = forward_iter(X, V) forward_path.append(X.val[0]) if i_iter in all_N_iter: print i_iter paths = [ random_unwind(X, V, rs, i_iter + 1) for i_back in range(N_back) ] densities.append(np.mean([p[0] for p in paths])) if i_iter == N_iter_plot: backward_paths = paths[:N_back_keep] return forward_path, backward_paths, densities
def run(): print "Running experiment..." results = defaultdict(list) for i in xrange(N_samples): print i, def callback(x, t, v, entropy, log_decay): results[("x", i)].append( x.copy()) # Replace this with a loop over kwargs? results[("entropy", i)].append(entropy) results[("velocity", i)].append(v) results[("log decay", i)].append(log_decay[0]) results[("likelihood", i)].append(-nllfun(x)) rs = RandomState((seed, i)) aed3_anneal(gradfun, x_scale=x_init_scale, callback=callback, learn_rate=epsilon, init_log_decay=init_log_decay, decay_learn_rate=decay_learn_rate, rs=rs, annealing_schedule=annealing_schedule) return results
def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i_iter)) idxs = rs.randint(N_train, size=batch_size) nll = nllfun(w, train_images[idxs], train_labels[idxs]) * N_train #nlp = neg_log_prior(w) return nll # + nlp
def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i, i_iter)) idxs = rs.randint(N_train, size=batch_size) nll = nllfun(w, train_inputs[idxs], train_targets[idxs]) * N_train nlp = neg_log_prior(w) return nll + nlp
def test_exact_log_det(): D = 100 rs = RandomState(1) mat = np.eye(D) - 0.1 * np.diag(rs.rand(D)) mvp = lambda v: np.dot(mat, v) assert exact_log_det(mvp, D) == np.log(np.linalg.det(mat))
def test_exact_log_det(): D = 100 rs = RandomState(1) mat = np.eye(D) - 0.1 * np.diag(rs.rand(D)) mvp = lambda v : np.dot(mat, v) assert exact_log_det(mvp, D) == np.log(np.linalg.det(mat))
def run(): (train_images, train_labels),\ (tests_images, tests_labels) = load_data_subset(N_train, N_tests) parser, pred_fun, nllfun, frac_err = make_nn_funs(layer_sizes) N_param = len(parser.vect) def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i, i_iter)) idxs = rs.randint(N_train, size=batch_size) nll = nllfun(w, train_images[idxs], train_labels[idxs]) * N_train nlp = neg_log_prior(w) return nll + nlp gradfun = grad(indexed_loss_fun) def callback(x, t, entropy): results["entropy_per_dpt"].append(entropy / N_train) results["x_rms"].append(np.sqrt(np.mean(x * x))) results["minibatch_likelihood"].append(-indexed_loss_fun(x, t)) results["log_prior_per_dpt"].append(-neg_log_prior(x) / N_train) if t % thin != 0 and t != N_iter and t != 0: return results["iterations"].append(t) results["train_likelihood"].append( -nllfun(x, train_images, train_labels)) results["tests_likelihood"].append( -nllfun(x, tests_images, tests_labels)) results["tests_error"].append(frac_err(x, tests_images, tests_labels)) results["marg_likelihood"].append( estimate_marginal_likelihood(results["train_likelihood"][-1], results["entropy_per_dpt"][-1])) preds[i].append(pred_fun(x, tests_images)) print "Iteration {0:5} Train lik {1:2.4f} Test lik {2:2.4f}" \ " Marg lik {3:2.4f} Test err {4:2.4f}".format( t, results["train_likelihood"][-1], results["tests_likelihood"][-1], results["marg_likelihood" ][-1], results["tests_error" ][-1]) all_results = [] preds = defaultdict(list) for i in xrange(N_samples): results = defaultdict(list) rs = RandomState((seed, i)) sgd_entropic_damped(gradfun, np.full(N_param, init_scale), N_iter, alpha, rs, callback, width=100) all_results.append(results) # Make ensemble prediction by averaging predicted class-conditional probabilities. ensemble_frac_err = [] ensemble_loglike = [] for t in xrange(len(all_results[0]["iterations"])): cur_probs = [preds[i][t] for i in xrange(N_samples)] avg_probs_unn = np.mean(np.exp(cur_probs), axis=0) avg_probs = avg_probs_unn / np.sum( avg_probs_unn, axis=1, keepdims=True) ensemble_preds = np.argmax(avg_probs, axis=1) ensemble_frac_err.append( np.mean(np.argmax(tests_labels, axis=1) != ensemble_preds)) ensemble_loglike.append( np.sum(np.log(avg_probs) * tests_labels) / tests_images.shape[0]) return all_results, ensemble_frac_err, ensemble_loglike
def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i, i_iter)) idxs = rs.randint(N_train, size=batch_size) return nllfun(w, train_images[idxs], train_labels[idxs]) * N_train
def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i, i_iter)) idxs = rs.randint(N_train, size=batch_size) nll = nllfun(w, train_images[idxs], train_labels[idxs]) * N_train nlp = neg_log_prior(w) return nll + nlp
def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i, i_iter)) idxs = rs.randint(N_train, size=batch_size) return nllfun(w, train_images[idxs], train_labels[idxs])
def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i, i_iter)) idxs = rs.randint(N_train, size=batch_size) nll = nllfun(w, train_inputs[idxs], train_targets[idxs]) * N_train return nll