def run(): print "Running experiment..." sgd_optimized_points = [] ed_optimized_points = [] for i in xrange(N_samples): rs = RandomState((seed, i)) x0 = rs.randn(D) * x_init_scale v0 = rs.randn(D) * v_init_scale sgd_optimized_points.append( sgd(grad(nllfunt), x=x0, v=v0, learn_rate=alpha, decay=decay, iters=N_iter)) rs = RandomState((seed, i)) x0 = rs.randn(D) * x_init_scale v0 = rs.randn(D) * v_init_scale ed_optimized_points.append( entropic_descent(grad(nllfunt), x=x0, v=v0, learn_rate=alpha, decay=decay, iters=N_iter, theta=theta, rs=rs)) entropy = np.log(decay) * D * N_iter return sgd_optimized_points, ed_optimized_points, entropy
def run(): print "Running experiment..." sgd_optimized_points = [] ed_optimized_points = [] aed_optimized_points = [] asgd_optimized_points = [] for i in xrange(N_samples): rs = RandomState((seed, i)) x0 = rs.randn(D) * x_init_scale v0 = rs.randn(D) * v_init_scale sgd_optimized_points.append( sgd(grad(nllfunt), x=x0, v=v0, learn_rate=alpha, decay=decay, iters=N_iter)) rs = RandomState((seed, i)) x0 = rs.randn(D) * x_init_scale v0 = rs.randn(D) * v_init_scale ed_optimized_points.append( entropic_descent(grad(nllfunt), x=x0, v=v0, learn_rate=alpha, decay=decay, iters=N_iter, theta=theta, rs=rs)) entropy = np.log(decay) * D * N_iter rs = RandomState((seed, i)) x0 = rs.randn(D) * x_init_scale v0 = rs.randn(D) * v_init_scale aed_optimized_points.append( adaptive_entropic_descent(grad(nllfunt), x=x0, v=v0, init_learn_rate=alpha, init_log_decay=np.log(decay), meta_learn_rate=meta_alpha, meta_decay=meta_decay, iters=N_iter)) rs = RandomState((seed, i)) x0 = rs.randn(D) * x_init_scale v0 = rs.randn(D) * v_init_scale asgd_optimized_points.append( adaptive_sgd(grad(nllfunt), x=x0, v=v0, init_learn_rate=alpha, init_log_decay=np.log(decay), meta_learn_rate=meta_alpha, meta_decay=meta_decay, iters=N_iter)) return sgd_optimized_points, ed_optimized_points, aed_optimized_points, asgd_optimized_points, entropy
def run(): print "Running experiment..." results = defaultdict(list) for i in xrange(N_samples): print i, def callback(x, t, v, entropy, log_decay): results[("x", i)].append( x.copy()) # Replace this with a loop over kwargs? results[("entropy", i)].append(entropy) results[("velocity", i)].append(v) results[("log decay", i)].append(log_decay[0]) results[("likelihood", i)].append(-nllfun(x)) rs = RandomState((seed, i)) x0 = rs.randn(D) * init_scale v0 = rs.randn(D) # TODO: account for entropy of init. init_entropy = 0.5 * D * (1 + np.log(2*np.pi))\ + np.sum(np.log(init_scale)) aed3(gradfun, callback=callback, x=x0, v=v0, learn_rate=epsilon, iters=N_iter, init_log_decay=init_log_decay, decay_learn_rate=decay_learn_rate, entropy=init_entropy) return results
def run(): print "Running experiment..." sgd_optimized_points = [] for i in xrange(N_samples): rs = RandomState((seed, i)) x0 = rs.randn(D) * x_init_scale v0 = rs.randn(D) * v_init_scale sgd_optimized_points.append( sgd(grad(nllfunt), x=x0, v=v0, learn_rate=alpha, decay=decay, iters=N_iter)) return sgd_optimized_points, entropy
def run(): (train_images, train_labels),\ (tests_images, tests_labels) = load_data_subset(N_train, N_tests) parser, pred_fun, nllfun, frac_err = make_nn_funs(layer_sizes) N_param = len(parser.vect) print "Running experiment..." results = defaultdict(list) for i in xrange(N_samples): def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i, i_iter)) idxs = rs.randint(N_train, size=batch_size) nll = nllfun(w, train_images[idxs], train_labels[idxs]) * N_train nlp = neg_log_prior(w) return nll + nlp gradfun = grad(indexed_loss_fun) def callback(x, t, v, entropy): results[("entropy", i)].append(entropy / N_train) results[("v_norm", i)].append(norm(v) / np.sqrt(N_param)) results[("minibatch_likelihood", i)].append(-indexed_loss_fun(x, t)) results[("log_prior_per_dpt", i)].append(-neg_log_prior(x) / N_train) if t % thin != 0 and t != N_iter and t != 0: return results[('iterations', i)].append(t) results[("train_likelihood", i)].append(-nllfun(x, train_images, train_labels)) results[("tests_likelihood", i)].append(-nllfun(x, tests_images, tests_labels)) results[("tests_error", i)].append(frac_err(x, tests_images, tests_labels)) print "Iteration {0:5} Train likelihood {1:2.4f} Test likelihood {2:2.4f}" \ " Test Err {3:2.4f}".format(t, results[("train_likelihood", i)][-1], results[("tests_likelihood", i)][-1], results[("tests_error", i)][-1]) rs = RandomState((seed, i)) x0 = rs.randn(N_param) * init_scale v0 = rs.randn(N_param) # TODO: account for entropy of init. #entropy = 0.5 * D * (1 + np.log(2*np.pi)) + np.sum(np.log(x_scale)) + 0.5 * (D - norm(v) **2) aed3(gradfun, callback=callback, x=x0, v=v0, learn_rate=epsilon, iters=N_iter, init_log_decay=init_log_decay, decay_learn_rate=decay_learn_rate) return results
def run(): (train_images, train_labels), (tests_images, tests_labels) = load_data_subset(N_train, N_tests) parser, pred_fun, nllfun, frac_err = make_nn_funs(layer_sizes) N_param = len(parser.vect) print "Running experiment..." results = defaultdict(list) for i in xrange(N_samples): def indexed_loss_fun(w, i_iter): rs = RandomState((seed, i, i_iter)) idxs = rs.randint(N_train, size=batch_size) nll = nllfun(w, train_images[idxs], train_labels[idxs]) * N_train nlp = neg_log_prior(w) return nll + nlp gradfun = grad(indexed_loss_fun) def callback(x, t, v, entropy): results[("entropy", i)].append(entropy / N_train) results[("v_norm", i)].append(norm(v) / np.sqrt(N_param)) results[("minibatch_likelihood", i)].append(-indexed_loss_fun(x, t)) results[("log_prior_per_dpt", i)].append(-neg_log_prior(x) / N_train) if t % thin != 0 and t != N_iter and t != 0: return results[("iterations", i)].append(t) results[("train_likelihood", i)].append(-nllfun(x, train_images, train_labels)) results[("tests_likelihood", i)].append(-nllfun(x, tests_images, tests_labels)) results[("tests_error", i)].append(frac_err(x, tests_images, tests_labels)) print "Iteration {0:5} Train likelihood {1:2.4f} Test likelihood {2:2.4f}" " Test Err {3:2.4f}".format( t, results[("train_likelihood", i)][-1], results[("tests_likelihood", i)][-1], results[("tests_error", i)][-1], ) rs = RandomState((seed, i)) x0 = rs.randn(N_param) * init_scale v0 = rs.randn(N_param) # TODO: account for entropy of init. # entropy = 0.5 * D * (1 + np.log(2*np.pi)) + np.sum(np.log(x_scale)) + 0.5 * (D - norm(v) **2) aed3( gradfun, callback=callback, x=x0, v=v0, learn_rate=epsilon, iters=N_iter, init_log_decay=init_log_decay, decay_learn_rate=decay_learn_rate, ) return results
def run(): print "Running experiment..." results = defaultdict(list) for i in xrange(N_samples): print i, def callback(x, t, v, entropy, log_decay): results[("x", i)].append(x.copy()) # Replace this with a loop over kwargs? results[("entropy", i)].append(entropy) results[("velocity", i)].append(v) results[("log decay", i)].append(log_decay[0]) results[("likelihood", i)].append(-nllfun(x)) rs = RandomState((seed, i)) x0 = rs.randn(D) * init_scale v0 = rs.randn(D) # TODO: account for entropy of init. init_entropy = 0.5 * D * (1 + np.log(2*np.pi))\ + np.sum(np.log(init_scale)) aed3(gradfun, callback=callback, x=x0, v=v0, learn_rate=epsilon, iters=N_iter, init_log_decay=init_log_decay, decay_learn_rate=decay_learn_rate, entropy=init_entropy) return results