def test_svi_step_guide_uses_grad(enumerate1): data = torch.tensor([0., 1., 3.]) @poutine.broadcast def model(): scale = pyro.param("scale") loc = pyro.sample("loc", dist.Normal(0., 10.)) with pyro.iarange("data", len(data)): pyro.sample("obs", dist.Normal(loc, scale), obs=data) pyro.sample("b", dist.Bernoulli(0.5)) @config_enumerate(default=enumerate1) def guide(): p = pyro.param("p", torch.tensor(0.5), constraint=constraints.unit_interval) scale = pyro.param("scale", torch.tensor(1.0), constraint=constraints.positive) var = pyro.param("var", torch.tensor(1.0), constraint=constraints.positive) x = torch.tensor(0., requires_grad=True) prior = dist.Normal(0., 10.).log_prob(x) likelihood = dist.Normal(x, scale).log_prob(data).sum() loss = -(prior + likelihood) g = grad(loss, [x], create_graph=True)[0] H = grad(g, [x], create_graph=True)[0] loc = x.detach() - g / H # newton step pyro.sample("loc", dist.Normal(loc, var)) pyro.sample("b", dist.Bernoulli(p)) elbo = TraceEnum_ELBO(max_iarange_nesting=1, strict_enumeration_warning=any([enumerate1])) inference = SVI(model, guide, pyro.optim.Adam({}), elbo) inference.step()
def test_dirichlet_bernoulli(Elbo, vectorized): pyro.clear_param_store() data = torch.tensor([1.0] * 6 + [0.0] * 4) def model1(data): concentration0 = torch.tensor([10.0, 10.0]) f = pyro.sample("latent_fairness", dist.Dirichlet(concentration0))[1] for i in pyro.irange("irange", len(data)): pyro.sample("obs_{}".format(i), dist.Bernoulli(f), obs=data[i]) def model2(data): concentration0 = torch.tensor([10.0, 10.0]) f = pyro.sample("latent_fairness", dist.Dirichlet(concentration0))[1] pyro.sample("obs", dist.Bernoulli(f).expand_by(data.shape).independent(1), obs=data) model = model2 if vectorized else model1 def guide(data): concentration_q = pyro.param("concentration_q", torch.tensor([15.0, 15.0]), constraint=constraints.positive) pyro.sample("latent_fairness", dist.Dirichlet(concentration_q)) elbo = Elbo(num_particles=7, strict_enumeration_warning=False) optim = Adam({"lr": 0.0005, "betas": (0.90, 0.999)}) svi = SVI(model, guide, optim, elbo) for step in range(40): svi.step(data)
def assert_ok(model, guide, elbo): """ Assert that inference works without warnings or errors. """ pyro.clear_param_store() inference = SVI(model, guide, Adam({"lr": 1e-6}), elbo) inference.step()
def test_dynamic_lr(scheduler, num_steps): pyro.clear_param_store() def model(): sample = pyro.sample('latent', Normal(torch.tensor(0.), torch.tensor(0.3))) return pyro.sample('obs', Normal(sample, torch.tensor(0.2)), obs=torch.tensor(0.1)) def guide(): loc = pyro.param('loc', torch.tensor(0.)) scale = pyro.param('scale', torch.tensor(0.5)) pyro.sample('latent', Normal(loc, scale)) svi = SVI(model, guide, scheduler, loss=TraceGraph_ELBO()) for epoch in range(2): scheduler.set_epoch(epoch) for _ in range(num_steps): svi.step() if epoch == 1: loc = pyro.param('loc') scale = pyro.param('scale') opt = scheduler.optim_objs[loc].optimizer assert opt.state_dict()['param_groups'][0]['lr'] == 0.02 assert opt.state_dict()['param_groups'][0]['initial_lr'] == 0.01 opt = scheduler.optim_objs[scale].optimizer assert opt.state_dict()['param_groups'][0]['lr'] == 0.02 assert opt.state_dict()['param_groups'][0]['initial_lr'] == 0.01
def test_quantiles(auto_class, Elbo): def model(): pyro.sample("x", dist.Normal(0.0, 1.0)) pyro.sample("y", dist.LogNormal(0.0, 1.0)) pyro.sample("z", dist.Beta(2.0, 2.0)) guide = auto_class(model) infer = SVI(model, guide, Adam({'lr': 0.01}), Elbo(strict_enumeration_warning=False)) for _ in range(100): infer.step() quantiles = guide.quantiles([0.1, 0.5, 0.9]) median = guide.median() for name in ["x", "y", "z"]: assert_equal(median[name], quantiles[name][1]) quantiles = {name: [v.item() for v in value] for name, value in quantiles.items()} assert -3.0 < quantiles["x"][0] assert quantiles["x"][0] + 1.0 < quantiles["x"][1] assert quantiles["x"][1] + 1.0 < quantiles["x"][2] assert quantiles["x"][2] < 3.0 assert 0.01 < quantiles["y"][0] assert quantiles["y"][0] * 2.0 < quantiles["y"][1] assert quantiles["y"][1] * 2.0 < quantiles["y"][2] assert quantiles["y"][2] < 100.0 assert 0.01 < quantiles["z"][0] assert quantiles["z"][0] + 0.1 < quantiles["z"][1] assert quantiles["z"][1] + 0.1 < quantiles["z"][2] assert quantiles["z"][2] < 0.99
def assert_error(model, guide, elbo): """ Assert that inference fails with an error. """ pyro.clear_param_store() inference = SVI(model, guide, Adam({"lr": 1e-6}), elbo) with pytest.raises((NotImplementedError, UserWarning, KeyError, ValueError, RuntimeError)): inference.step()
def test_svi_step_smoke(model, guide, enum_discrete, trace_graph): pyro.clear_param_store() data = Variable(torch.Tensor([0, 1, 9])) optimizer = pyro.optim.Adam({"lr": .001}) inference = SVI(model, guide, optimizer, loss="ELBO", trace_graph=trace_graph, enum_discrete=enum_discrete) with xfail_if_not_implemented(): inference.step(data)
def do_elbo_test(self, reparameterized, n_steps, lr, prec, beta1, difficulty=1.0, model_permutation=False): n_repa_nodes = torch.sum(self.which_nodes_reparam) if not reparameterized \ else len(self.q_topo_sort) logger.info((" - - - DO GAUSSIAN %d-LAYERED PYRAMID ELBO TEST " + "(with a total of %d RVs) [reparameterized=%s; %d/%d; perm=%s] - - -") % (self.N, (2 ** self.N) - 1, reparameterized, n_repa_nodes, len(self.q_topo_sort), model_permutation)) pyro.clear_param_store() # check graph structure is as expected but only for N=2 if self.N == 2: guide_trace = pyro.poutine.trace(self.guide, graph_type="dense").get_trace(reparameterized=reparameterized, model_permutation=model_permutation, difficulty=difficulty) expected_nodes = set(['log_sig_1R', 'kappa_1_1L', '_INPUT', 'constant_term_loc_latent_1R', '_RETURN', 'loc_latent_1R', 'loc_latent_1', 'constant_term_loc_latent_1', 'loc_latent_1L', 'constant_term_loc_latent_1L', 'log_sig_1L', 'kappa_1_1R', 'kappa_1R_1L', 'log_sig_1']) expected_edges = set([('loc_latent_1R', 'loc_latent_1'), ('loc_latent_1L', 'loc_latent_1R'), ('loc_latent_1L', 'loc_latent_1')]) assert expected_nodes == set(guide_trace.nodes) assert expected_edges == set(guide_trace.edges) adam = optim.Adam({"lr": lr, "betas": (beta1, 0.999)}) svi = SVI(self.model, self.guide, adam, loss=TraceGraph_ELBO()) for step in range(n_steps): t0 = time.time() svi.step(reparameterized=reparameterized, model_permutation=model_permutation, difficulty=difficulty) if step % 5000 == 0 or step == n_steps - 1: log_sig_errors = [] for node in self.target_lambdas: target_log_sig = -0.5 * torch.log(self.target_lambdas[node]) log_sig_error = param_mse('log_sig_' + node, target_log_sig) log_sig_errors.append(log_sig_error) max_log_sig_error = np.max(log_sig_errors) min_log_sig_error = np.min(log_sig_errors) mean_log_sig_error = np.mean(log_sig_errors) leftmost_node = self.q_topo_sort[0] leftmost_constant_error = param_mse('constant_term_' + leftmost_node, self.target_leftmost_constant) almost_leftmost_constant_error = param_mse('constant_term_' + leftmost_node[:-1] + 'R', self.target_almost_leftmost_constant) logger.debug("[mean function constant errors (partial)] %.4f %.4f" % (leftmost_constant_error, almost_leftmost_constant_error)) logger.debug("[min/mean/max log(scale) errors] %.4f %.4f %.4f" % (min_log_sig_error, mean_log_sig_error, max_log_sig_error)) logger.debug("[step time = %.3f; N = %d; step = %d]\n" % (time.time() - t0, self.N, step)) assert_equal(0.0, max_log_sig_error, prec=prec) assert_equal(0.0, leftmost_constant_error, prec=prec) assert_equal(0.0, almost_leftmost_constant_error, prec=prec)
def test_svi_step_smoke(model, guide, enumerate1): pyro.clear_param_store() data = torch.tensor([0.0, 1.0, 9.0]) guide = config_enumerate(guide, default=enumerate1) optimizer = pyro.optim.Adam({"lr": .001}) elbo = TraceEnum_ELBO(max_iarange_nesting=1, strict_enumeration_warning=any([enumerate1])) inference = SVI(model, guide, optimizer, loss=elbo) inference.step(data)
def assert_warning(model, guide, elbo): """ Assert that inference works but with a warning. """ pyro.clear_param_store() inference = SVI(model, guide, Adam({"lr": 1e-6}), elbo) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") inference.step() assert len(w), 'No warnings were raised' for warning in w: logger.info(warning)
def test_svi(Elbo, num_particles): pyro.clear_param_store() data = torch.arange(10) def model(data): loc = pyro.param("loc", torch.tensor(0.0)) scale = pyro.param("scale", torch.tensor(1.0), constraint=constraints.positive) pyro.sample("x", dist.Normal(loc, scale).expand_by(data.shape).independent(1), obs=data) def guide(data): pass elbo = Elbo(num_particles=num_particles, strict_enumeration_warning=False) inference = SVI(model, guide, Adam({"lr": 1e-6}), elbo) for i in range(100): inference.step(data)
def main(args): pyro.set_rng_seed(0) pyro.enable_validation() optim = Adam({"lr": 0.1}) inference = SVI(model, guide, optim, loss=Trace_ELBO()) # Data is an arbitrary json-like structure with tensors at leaves. one = torch.tensor(1.0) data = { "foo": one, "bar": [0 * one, 1 * one, 2 * one], "baz": { "noun": { "concrete": 4 * one, "abstract": 6 * one, }, "verb": 2 * one, }, } print('Step\tLoss') loss = 0.0 for step in range(args.num_epochs): loss += inference.step(data) if step and step % 10 == 0: print('{}\t{:0.5g}'.format(step, loss)) loss = 0.0 print('Parameters:') for name in sorted(pyro.get_param_store().get_all_param_names()): print('{} = {}'.format(name, pyro.param(name).detach().cpu().numpy()))
def do_elbo_test(self, reparameterized, n_steps, lr, prec, difficulty=1.0): n_repa_nodes = torch.sum(self.which_nodes_reparam) if not reparameterized else self.N logger.info(" - - - - - DO GAUSSIAN %d-CHAIN ELBO TEST [reparameterized = %s; %d/%d] - - - - - " % (self.N, reparameterized, n_repa_nodes, self.N)) if self.N < 0: def array_to_string(y): return str(map(lambda x: "%.3f" % x.detach().cpu().numpy()[0], y)) logger.debug("lambdas: " + array_to_string(self.lambdas)) logger.debug("target_mus: " + array_to_string(self.target_mus[1:])) logger.debug("target_kappas: "******"lambda_posts: " + array_to_string(self.lambda_posts[1:])) logger.debug("lambda_tilde_posts: " + array_to_string(self.lambda_tilde_posts)) pyro.clear_param_store() adam = optim.Adam({"lr": lr, "betas": (0.95, 0.999)}) elbo = TraceGraph_ELBO() loss_and_grads = elbo.loss_and_grads # loss_and_grads = elbo.jit_loss_and_grads # This fails. svi = SVI(self.model, self.guide, adam, loss=elbo.loss, loss_and_grads=loss_and_grads) for step in range(n_steps): t0 = time.time() svi.step(reparameterized=reparameterized, difficulty=difficulty) if step % 5000 == 0 or step == n_steps - 1: kappa_errors, log_sig_errors, loc_errors = [], [], [] for k in range(1, self.N + 1): if k != self.N: kappa_error = param_mse("kappa_q_%d" % k, self.target_kappas[k]) kappa_errors.append(kappa_error) loc_errors.append(param_mse("loc_q_%d" % k, self.target_mus[k])) log_sig_error = param_mse("log_sig_q_%d" % k, -0.5 * torch.log(self.lambda_posts[k])) log_sig_errors.append(log_sig_error) max_errors = (np.max(loc_errors), np.max(log_sig_errors), np.max(kappa_errors)) min_errors = (np.min(loc_errors), np.min(log_sig_errors), np.min(kappa_errors)) mean_errors = (np.mean(loc_errors), np.mean(log_sig_errors), np.mean(kappa_errors)) logger.debug("[max errors] (loc, log_scale, kappa) = (%.4f, %.4f, %.4f)" % max_errors) logger.debug("[min errors] (loc, log_scale, kappa) = (%.4f, %.4f, %.4f)" % min_errors) logger.debug("[mean errors] (loc, log_scale, kappa) = (%.4f, %.4f, %.4f)" % mean_errors) logger.debug("[step time = %.3f; N = %d; step = %d]\n" % (time.time() - t0, self.N, step)) assert_equal(0.0, max_errors[0], prec=prec) assert_equal(0.0, max_errors[1], prec=prec) assert_equal(0.0, max_errors[2], prec=prec)
def test_inference_deepGP(): gp1 = GPRegression(X, None, kernel, name="GPR1") Z, _ = gp1.model() gp2 = VariationalSparseGP(Z, y2D, Matern32(input_dim=3), Z.clone(), likelihood, name="GPR2") def model(): Z, _ = gp1.model() gp2.set_data(Z, y2D) gp2.model() def guide(): gp1.guide() gp2.guide() svi = SVI(model, guide, optim.Adam({}), Trace_ELBO()) svi.step()
def test_irange_smoke(auto_class, Elbo): def model(): x = pyro.sample("x", dist.Normal(0, 1)) assert x.shape == () for i in pyro.irange("irange", 3): y = pyro.sample("y_{}".format(i), dist.Normal(0, 1).expand_by([2, 1 + i, 2]).independent(3)) assert y.shape == (2, 1 + i, 2) z = pyro.sample("z", dist.Normal(0, 1).expand_by([2]).independent(1)) assert z.shape == (2,) pyro.sample("obs", dist.Bernoulli(0.1), obs=torch.tensor(0)) guide = auto_class(model) infer = SVI(model, guide, Adam({"lr": 1e-6}), Elbo(strict_enumeration_warning=False)) infer.step()
def test_median(auto_class, Elbo): def model(): pyro.sample("x", dist.Normal(0.0, 1.0)) pyro.sample("y", dist.LogNormal(0.0, 1.0)) pyro.sample("z", dist.Beta(2.0, 2.0)) guide = auto_class(model) infer = SVI(model, guide, Adam({'lr': 0.05}), Elbo(strict_enumeration_warning=False)) for _ in range(100): infer.step() median = guide.median() assert_equal(median["x"], torch.tensor(0.0), prec=0.1) if auto_class is AutoDelta: assert_equal(median["y"], torch.tensor(-1.0).exp(), prec=0.1) else: assert_equal(median["y"], torch.tensor(1.0), prec=0.1) assert_equal(median["z"], torch.tensor(0.5), prec=0.1)
def test_elbo_with_transformed_distribution(self): if self.verbose: print(" - - - - - DO LOGNORMAL-NORMAL ELBO TEST [uses TransformedDistribution] - - - - - ") pyro.clear_param_store() def model(): mu_latent = pyro.sample("mu_latent", dist.normal, self.mu0, torch.pow(self.tau0, -0.5)) bijector = AffineExp(torch.pow(self.tau, -0.5), mu_latent) x_dist = TransformedDistribution(dist.normal, bijector) pyro.observe("obs0", x_dist, self.data[0], ng_zeros(1), ng_ones(1)) pyro.observe("obs1", x_dist, self.data[1], ng_zeros(1), ng_ones(1)) return mu_latent def guide(): mu_q_log = pyro.param( "mu_q_log", Variable( self.log_mu_n.data + 0.17, requires_grad=True)) tau_q_log = pyro.param("tau_q_log", Variable(self.log_tau_n.data - 0.143, requires_grad=True)) mu_q, tau_q = torch.exp(mu_q_log), torch.exp(tau_q_log) pyro.sample("mu_latent", dist.normal, mu_q, torch.pow(tau_q, -0.5)) adam = optim.Adam({"lr": 0.001, "betas": (0.95, 0.999)}) svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True) for k in range(7000): svi.step() mu_error = param_abs_error("mu_q_log", self.log_mu_n) tau_error = param_abs_error("tau_q_log", self.log_tau_n) if k % 500 == 0 and self.verbose: print("mu_error, tau_error = %.4f, %.4f" % (mu_error, tau_error)) self.assertEqual(0.0, mu_error, prec=0.05) self.assertEqual(0.0, tau_error, prec=0.05)
def test_elbo_nonreparameterized(self): if self.verbose: print(" - - - - - DO POISSON-GAMMA ELBO TEST - - - - - ") pyro.clear_param_store() def model(): lambda_latent = pyro.sample("lambda_latent", dist.gamma, self.alpha0, self.beta0) for i, x in enumerate(self.data): pyro.observe("obs_{}".format(i), dist.poisson, x, lambda_latent) return lambda_latent def guide(): alpha_q_log = pyro.param( "alpha_q_log", Variable( self.log_alpha_n.data + 0.17, requires_grad=True)) beta_q_log = pyro.param( "beta_q_log", Variable( self.log_beta_n.data - 0.143, requires_grad=True)) alpha_q, beta_q = torch.exp(alpha_q_log), torch.exp(beta_q_log) pyro.sample("lambda_latent", dist.gamma, alpha_q, beta_q, baseline=dict(use_decaying_avg_baseline=True)) adam = optim.Adam({"lr": .0007, "betas": (0.95, 0.999)}) svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True) for k in range(7000): svi.step() alpha_error = param_abs_error("alpha_q_log", self.log_alpha_n) beta_error = param_abs_error("beta_q_log", self.log_beta_n) if k % 500 == 0 and self.verbose: print("alpha_q_log_error, beta_q_log_error: %.4f, %.4f" % (alpha_error, beta_error)) self.assertEqual(0.0, alpha_error, prec=0.08) self.assertEqual(0.0, beta_error, prec=0.08)
def do_elbo_test(self, reparameterized, n_steps): if self.verbose: print(" - - - - - DO NORMALNORMAL ELBO TEST [reparameterized = %s] - - - - - " % reparameterized) pyro.clear_param_store() def model(): mu_latent = pyro.sample( "mu_latent", dist.Normal(self.mu0, torch.pow(self.lam0, -0.5), reparameterized=reparameterized)) for i, x in enumerate(self.data): pyro.observe("obs_%d" % i, dist.normal, x, mu_latent, torch.pow(self.lam, -0.5)) return mu_latent def guide(): mu_q = pyro.param("mu_q", Variable(self.analytic_mu_n.data + 0.334 * torch.ones(2), requires_grad=True)) log_sig_q = pyro.param("log_sig_q", Variable( self.analytic_log_sig_n.data - 0.29 * torch.ones(2), requires_grad=True)) sig_q = torch.exp(log_sig_q) mu_latent = pyro.sample("mu_latent", dist.Normal(mu_q, sig_q, reparameterized=reparameterized), baseline=dict(use_decaying_avg_baseline=True)) return mu_latent adam = optim.Adam({"lr": .0015, "betas": (0.97, 0.999)}) svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True) for k in range(n_steps): svi.step() mu_error = param_mse("mu_q", self.analytic_mu_n) log_sig_error = param_mse("log_sig_q", self.analytic_log_sig_n) if k % 250 == 0 and self.verbose: print("mu error, log(sigma) error: %.4f, %.4f" % (mu_error, log_sig_error)) self.assertEqual(0.0, mu_error, prec=0.03) self.assertEqual(0.0, log_sig_error, prec=0.03)
def do_elbo_test(self, reparameterized, n_steps, beta1, lr): if self.verbose: print(" - - - - - DO LOGNORMAL-NORMAL ELBO TEST [repa = %s] - - - - - " % reparameterized) pyro.clear_param_store() pt_guide = LogNormalNormalGuide(self.log_mu_n.data + 0.17, self.log_tau_n.data - 0.143) def model(): mu_latent = pyro.sample("mu_latent", dist.normal, self.mu0, torch.pow(self.tau0, -0.5)) sigma = torch.pow(self.tau, -0.5) pyro.observe("obs0", dist.lognormal, self.data[0], mu_latent, sigma) pyro.observe("obs1", dist.lognormal, self.data[1], mu_latent, sigma) return mu_latent def guide(): pyro.module("mymodule", pt_guide) mu_q, tau_q = torch.exp(pt_guide.mu_q_log), torch.exp(pt_guide.tau_q_log) sigma = torch.pow(tau_q, -0.5) pyro.sample("mu_latent", dist.Normal(mu_q, sigma, reparameterized=reparameterized), baseline=dict(use_decaying_avg_baseline=True)) adam = optim.Adam({"lr": lr, "betas": (beta1, 0.999)}) svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True) for k in range(n_steps): svi.step() mu_error = param_abs_error("mymodule$$$mu_q_log", self.log_mu_n) tau_error = param_abs_error("mymodule$$$tau_q_log", self.log_tau_n) if k % 500 == 0 and self.verbose: print("mu_error, tau_error = %.4f, %.4f" % (mu_error, tau_error)) self.assertEqual(0.0, mu_error, prec=0.05) self.assertEqual(0.0, tau_error, prec=0.05)
def main(args): pyro.set_rng_seed(0) pyro.enable_validation() optim = Adam({"lr": 0.1}) inference = SVI(model, guide, optim, loss=Trace_ELBO()) data = torch.tensor([0.0, 1.0, 2.0, 20.0, 30.0, 40.0]) k = 2 print('Step\tLoss') loss = 0.0 for step in range(args.num_epochs): if step and step % 10 == 0: print('{}\t{:0.5g}'.format(step, loss)) loss = 0.0 loss += inference.step(data, k) print('Parameters:') for name in sorted(pyro.get_param_store().get_all_param_names()): print('{} = {}'.format(name, pyro.param(name).detach().cpu().numpy()))
def do_test_per_param_optim(self, fixed_param, free_param): pyro.clear_param_store() def model(): prior_dist = Normal(self.mu0, torch.pow(self.lam0, -0.5)) mu_latent = pyro.sample("mu_latent", prior_dist) x_dist = Normal(mu_latent, torch.pow(self.lam, -0.5)) pyro.observe("obs", x_dist, self.data) return mu_latent def guide(): mu_q = pyro.param( "mu_q", Variable( torch.zeros(1), requires_grad=True)) log_sig_q = pyro.param( "log_sig_q", Variable( torch.zeros(1), requires_grad=True)) sig_q = torch.exp(log_sig_q) pyro.sample("mu_latent", Normal(mu_q, sig_q)) def optim_params(module_name, param_name, tags): if param_name == fixed_param: return {'lr': 0.00} elif param_name == free_param: return {'lr': 0.01} adam = optim.Adam(optim_params) adam2 = optim.Adam(optim_params) svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True) svi2 = SVI(model, guide, adam2, loss="ELBO", trace_graph=True) svi.step() adam_initial_step_count = list(adam.get_state()['mu_q']['state'].items())[0][1]['step'] adam.save('adam.unittest.save') svi.step() adam_final_step_count = list(adam.get_state()['mu_q']['state'].items())[0][1]['step'] adam2.load('adam.unittest.save') svi2.step() adam2_step_count_after_load_and_step = list(adam2.get_state()['mu_q']['state'].items())[0][1]['step'] assert adam_initial_step_count == 1 assert adam_final_step_count == 2 assert adam2_step_count_after_load_and_step == 2 free_param_unchanged = torch.equal(pyro.param(free_param).data, torch.zeros(1)) fixed_param_unchanged = torch.equal(pyro.param(fixed_param).data, torch.zeros(1)) assert fixed_param_unchanged and not free_param_unchanged
def main(args): # load data print('loading training data...') if not os.path.exists('faces_training.csv'): wget.download('https://d2fefpcigoriu7.cloudfront.net/datasets/faces_training.csv', 'faces_training.csv') data = torch.tensor(np.loadtxt('faces_training.csv', delimiter=',')).float() sparse_gamma_def = SparseGammaDEF() opt = optim.AdagradRMSProp({"eta": 4.5, "t": 0.1}) svi = SVI(sparse_gamma_def.model, sparse_gamma_def.guide, opt, loss=Trace_ELBO()) print('\nbeginning training...') # the training loop for k in range(args.num_epochs): loss = svi.step(data) sparse_gamma_def.clip_params() # we clip params after each gradient step if k % 20 == 0 and k > 0: print("[epoch %04d] training elbo: %.4g" % (k, -loss))
def main(args): pyro.set_rng_seed(0) pyro.clear_param_store() K = 2 data = torch.tensor([0.0, 1.0, 2.0, 20.0, 30.0, 40.0]) optim = pyro.optim.Adam({'lr': 0.1}) inference = SVI(model, config_enumerate(guide), optim, loss=TraceEnum_ELBO(max_plate_nesting=1)) print('Step\tLoss') loss = 0.0 for step in range(args.num_epochs): if step and step % 10 == 0: print('{}\t{:0.5g}'.format(step, loss)) loss = 0.0 loss += inference.step(K, data) print('Parameters:') for name, value in sorted(pyro.get_param_store().items()): print('{} = {}'.format(name, value.detach().cpu().numpy()))
def optimize(self, optimizer=None, loss=None, num_steps=1000): """ A convenient method to optimize parameters for the Gaussian Process model using :class:`~pyro.infer.svi.SVI`. :param PyroOptim optimizer: A Pyro optimizer. :param ELBO loss: A Pyro loss instance. :param int num_steps: Number of steps to run SVI. :returns: a list of losses during the training procedure :rtype: list """ if optimizer is None: optimizer = Adam({}) if not isinstance(optimizer, PyroOptim): raise ValueError("Optimizer should be an instance of " "pyro.optim.PyroOptim class.") if loss is None: loss = Trace_ELBO() svi = SVI(self.model, self.guide, optimizer, loss=loss) losses = [] for i in range(num_steps): losses.append(svi.step()) return losses
def fit(self, X, Y, verbose=False): optim = Adam({ "lr": self.weight_decay, "weight_decay": self.weight_decay }) svi = SVI(self.model, self.guide, optim, loss=Trace_ELBO()) data = np.concatenate((X, Y), axis=1) data = Variable(torch.from_numpy(data).type(torch.FloatTensor)) loss_log = [] for epoch in range(self.epochs): loss = svi.step(data) loss_log.append(loss) if verbose: if epoch % 100 == 0: print('Epoch {} loss: {}'.format(epoch + 1, loss)) # Save best model if loss <= min(loss_log): self.best_model = self.guide
def test_auto_dirichlet(auto_class, Elbo): num_steps = 2000 prior = torch.tensor([0.5, 1.0, 1.5, 3.0]) data = torch.tensor([0] * 4 + [1] * 2 + [2] * 5).long() posterior = torch.tensor([4.5, 3.0, 6.5, 3.0]) def model(data): p = pyro.sample("p", dist.Dirichlet(prior)) with pyro.plate("data_plate"): pyro.sample("data", dist.Categorical(p).expand_by(data.shape), obs=data) guide = auto_class(model) svi = SVI(model, guide, optim.Adam({"lr": .003}), loss=Elbo()) for _ in range(num_steps): loss = svi.step(data) assert np.isfinite(loss), loss expected_mean = posterior / posterior.sum() actual_mean = biject_to(constraints.simplex)(guide.loc) assert_equal(actual_mean, expected_mean, prec=0.2, msg=''.join([ '\nexpected {}'.format(expected_mean.detach().cpu().numpy()), '\n actual {}'.format(actual_mean.detach().cpu().numpy())]))
def fit(self, x: torch.Tensor) -> MixtureModel: def init_loc_fn(site): K = self.num_components if site["name"] == "weights": return torch.ones(K) / K if site["name"] == "scales": return torch.tensor([[(x.var() / 2).sqrt()] * 2] * K) if site["name"] == "locs": return x[torch.multinomial(torch.ones(x.shape[0]) / x.shape[0], K), :] raise ValueError(site["name"]) self.guide = AutoDelta(poutine.block(self.model, expose=['weights', 'locs', 'scales']), init_loc_fn=init_loc_fn) optim = pyro.optim.Adam({'lr': 0.1, 'betas': [0.8, 0.99]}) loss = TraceEnum_ELBO(max_plate_nesting=1) svi = SVI(self.model, self.guide, optim, loss=loss) for i in range(self.optim_steps): elbo = svi.step(x) self.history["loss"].append(elbo) return self
def main(_argv): transition_alphas = torch.tensor([[10., 90.], [90., 10.]]) emission_alphas = torch.tensor([[[30., 20., 5.]], [[5., 10., 100.]]]) lengths = torch.randint(10, 30, (10000,)) trace = poutine.trace(model).get_trace(transition_alphas, emission_alphas, lengths) obs_sequences = [site['value'] for name, site in trace.nodes.items() if name.startswith("element_")] obs_sequences = torch.stack(obs_sequences, dim=-2) guide = AutoDelta(poutine.block(model, hide_fn=lambda site: site['name'].startswith('state')), init_loc_fn=init_to_sample) svi = SVI(model, guide, Adam(dict(lr=0.1)), JitTraceEnum_ELBO()) total = 1000 with tqdm.trange(total) as t: for i in t: loss = svi.step(0.5 * torch.ones((2, 2), dtype=torch.float), 0.3 * torch.ones((2, 1, 3), dtype=torch.float), lengths, obs_sequences) t.set_description_str(f"SVI ({i}/{total}): {loss}") median = guide.median() print("Transition probs: ", median['transition_probs'].detach().numpy()) print("Emission probs: ", median['emission_probs'].squeeze().detach().numpy())
def all_bands(priors, lr=0.005, n_steps=1000, n_samples=1000, verbose=True, sub=1): from pyro.infer import Predictive pyro.clear_param_store() guide = AutoMultivariateNormal(spire_model, init_loc_fn=init_to_mean) svi = SVI(spire_model, guide, optim.Adam({"lr": lr}), loss=Trace_ELBO()) loss_history = [] for i in range(n_steps): loss = svi.step(priors, sub=sub) if (i % 100 == 0) and verbose: print('ELBO loss: {}'.format(loss)) loss_history.append(loss) print('ELBO loss: {}'.format(loss)) predictive = Predictive(spire_model, guide=guide, num_samples=n_samples) samples = { k: v.squeeze(-1).detach().cpu().numpy() for k, v in predictive(priors).items() if k != "obs" } f_low_lim = torch.tensor([p.prior_flux_lower for p in priors], dtype=torch.float) f_up_lim = torch.tensor([p.prior_flux_upper for p in priors], dtype=torch.float) f_vec_multi = (f_up_lim - f_low_lim) * samples['src_f'][..., :, :] + f_low_lim samples['src_f'] = f_vec_multi.squeeze(-3).numpy() samples['sigma_conf'] = samples['sigma_conf'].squeeze(-1).squeeze(-2) samples['bkg'] = samples['bkg'].squeeze(-1).squeeze(-2) return {'loss_history': loss_history, 'samples': samples}
def fit(self, T, W, X, Y, S=1e-7): data = self.get_data_dict(T, W, X, Y, S) lr = self.lr svi = SVI(self.model, self.guide, Adam({'lr': lr}), loss=Trace_ELBO()) lc = [] lt = [] pyro.set_rng_seed(0) pyro.clear_param_store() if self.notebook: from tqdm.notebook import tqdm else: from tqdm import tqdm for i in tqdm(range(self.n_iter)): elbo = svi.step(data) lt.append(elbo) if i and not i % (self.n_iter // self.n_stp): lr *= .1 svi = SVI(self.model, self.guide, Adam({'lr': lr}), loss=Trace_ELBO()) if not i % (self.n_iter // 20): with torch.no_grad(): lc.append(sum(lt) / len(lt)) lt = [] pars = self.guide() distr = self.get_distr(data, pars) llk = distr.log_prob(data['Y']).mean().item() r2 = np.corrcoef(distr.mean.view(-1), data['Y'].view(-1))[0, 1]**2 print( '%d\t\tELBO: %.2E - LLK: %.2E - r2: %.3f - lr: %.2E' % (i, elbo, llk, r2, lr)) self.lrn_crvs.append((i, elbo, llk, r2, lr))
def _train_full_data(self, x_data, obs2sample, n_epochs=20000, lr=0.002): idx = np.arange(x_data.shape[0]).astype("int64") device = torch.device("cuda") idx = torch.tensor(idx).to(device) x_data = torch.tensor(x_data).to(device) obs2sample = torch.tensor(obs2sample).to(device) self.to(device) pyro.clear_param_store() self.guide(x_data, idx, obs2sample) svi = SVI( self.model, self.guide, optim.ClippedAdam({ "lr": lr, "clip_norm": 200 }), loss=Trace_ELBO(), ) iter_iterator = tqdm(range(n_epochs)) hist = [] for it in iter_iterator: loss = svi.step(x_data, idx, obs2sample) iter_iterator.set_description("Epoch " + "{:d}".format(it) + ", -ELBO: " + "{:.4e}".format(loss)) hist.append(loss) if it % 500 == 0: torch.cuda.empty_cache() self.hist = hist
def test_ss_mle(dim, dist): base_dist = dist[0](*(torch.tensor(param).expand((dim, )) for param in dist[1])).to_event(1) skewness_tar = _skewness(base_dist.event_shape) data = SineSkewed(base_dist, skewness_tar).sample((1000, )) def model(data, batch_shape): skews = [] for i in range(dim): skews.append( pyro.param( f"skew{i}", 0.5 * torch.ones(batch_shape), constraint=constraints.interval(-1, 1), )) skewness = torch.stack(skews, dim=-1) with pyro.plate("data", data.size(-len(data.size()))): pyro.sample("obs", SineSkewed(base_dist, skewness), obs=data) def guide(data, batch_shape): pass pyro.clear_param_store() adam = Adam({"lr": 0.1}) svi = SVI(model, guide, adam, loss=Trace_ELBO()) losses = [] steps = 80 for step in range(steps): losses.append(svi.step(data, base_dist.batch_shape)) act_skewness = torch.stack( [v for k, v in pyro.get_param_store().items() if "skew" in k], dim=-1) assert_equal(act_skewness, skewness_tar, 1e-1)
def main(args): logging.info('Generating data') pyro.set_rng_seed(0) pyro.clear_param_store() # We can generate synthetic data directly by calling the model. true_topic_weights, true_topic_words, data = model(args=args) # We'll train using SVI. logging.info('-' * 40) logging.info('Training on {} documents'.format(args.num_docs)) predictor = make_predictor(args) guide = functools.partial(parametrized_guide, predictor) Elbo = JitTraceEnum_ELBO if args.jit else TraceEnum_ELBO elbo = Elbo(max_plate_nesting=2) optim = ClippedAdam({'lr': args.learning_rate}) svi = SVI(model, guide, optim, elbo) logging.info('Step\tLoss') for step in range(args.num_steps): loss = svi.step(data, args=args, batch_size=args.batch_size) if step % 10 == 0: logging.info('{: >5d}\t{}'.format(step, loss)) loss = elbo.loss(model, guide, data, args=args) logging.info('final loss = {}'.format(loss))
def test_non_nested_plating_sum(): """Example from https://github.com/pyro-ppl/pyro/issues/2361""" # Generative model: data = x @ weights + eps def model(data, weights): loc = torch.tensor(1.0) scale = torch.tensor(0.1) # Sample latents (shares no dimensions with data) with pyro.plate("x_plate", weights.shape[0]): x = pyro.sample("x", pyro.distributions.Normal(loc, scale)) # Combine with weights and sample with pyro.plate("data_plate_1", data.shape[-1]): with pyro.plate("data_plate_2", data.shape[-2]): pyro.sample("data", pyro.distributions.Normal(x @ weights, scale), obs=data) def guide(data, weights): loc = pyro.param("x_loc", torch.tensor(0.5)) scale = torch.tensor(0.1) with pyro.plate("x_plate", weights.shape[0]): pyro.sample("x", pyro.distributions.Normal(loc, scale)) data = torch.randn([5, 3]) weights = torch.randn([2, 3]) adam = optim.Adam({"lr": 0.01}) loss_fn = RenyiELBO(num_particles=30, vectorize_particles=True) svi = SVI(model, guide, adam, loss_fn) for step in range(1): loss = svi.step(data, weights) if step % 20 == 0: logger.info("step {} loss = {:0.4g}".format(step, loss))
def infer_posterior(self, iter_steps=10000, num_particles=100, optim_kwargs={'lr': .01}): """Perform SVI over free model parameters. """ clear_param_store() svi = SVI(model=self.model, guide=self.guide, optim=Adam(optim_kwargs), loss=TraceEnum_ELBO(num_particles=num_particles, vectorize_particles=True)) loss = [] pbar = tqdm(range(iter_steps), position=0) for step in pbar: loss.append(svi.step()) pbar.set_description("Mean ELBO %6.2f" % tensor(loss[-20:]).mean()) if np.isnan(loss[-1]): break self.loss = loss
def test_sequential_plating_sum(): """Example from https://github.com/pyro-ppl/pyro/issues/2361""" def model(data): x = pyro.sample("x", dist.Bernoulli(torch.tensor(0.5))) for i in pyro.plate("data_plate", len(data)): pyro.sample( "data_{:d}".format(i), dist.Normal(x, scale=torch.tensor(0.1)), obs=data[i], ) def guide(data): p = pyro.param("p", torch.tensor(0.5)) pyro.sample("x", pyro.distributions.Bernoulli(p)) data = torch.cat([torch.randn([5]), 1.0 + torch.randn([5])]) adam = optim.Adam({"lr": 0.01}) loss_fn = RenyiELBO(alpha=0, num_particles=30, vectorize_particles=True) svi = SVI(model, guide, adam, loss_fn) for step in range(1): loss = svi.step(data) if step % 20 == 0: logger.info("step {} loss = {:0.4g}".format(step, loss))
def test_reparam_stable(): data = dist.Poisson(torch.randn(8).exp()).sample() @poutine.reparam(config={"dz": LatentStableReparam(), "y": LatentStableReparam()}) def model(): stability = pyro.sample("stability", dist.Uniform(1., 2.)) trans_skew = pyro.sample("trans_skew", dist.Uniform(-1., 1.)) obs_skew = pyro.sample("obs_skew", dist.Uniform(-1., 1.)) scale = pyro.sample("scale", dist.Gamma(3, 1)) # We use separate plates because the .cumsum() op breaks independence. with pyro.plate("time1", len(data)): dz = pyro.sample("dz", dist.Stable(stability, trans_skew)) z = dz.cumsum(-1) with pyro.plate("time2", len(data)): y = pyro.sample("y", dist.Stable(stability, obs_skew, scale, z)) pyro.sample("x", dist.Poisson(y.abs()), obs=data) guide = AutoDelta(model) svi = SVI(model, guide, optim.Adam({"lr": 0.01}), Trace_ELBO()) for step in range(100): loss = svi.step() if step % 20 == 0: logger.info("step {} loss = {:0.4g}".format(step, loss))
def main(args): if args.cuda: torch.set_default_tensor_type('torch.cuda.FloatTensor') logging.info('Loading data') data = poly.load_data(poly.JSB_CHORALES) logging.info('-' * 40) model = models[args.model] logging.info('Training {} on {} sequences'.format( model.__name__, len(data['train']['sequences']))) sequences = data['train']['sequences'] lengths = data['train']['sequence_lengths'] # find all the notes that are present at least once in the training set present_notes = ((sequences == 1).sum(0).sum(0) > 0) # remove notes that are never played (we remove 37/88 notes) sequences = sequences[..., present_notes] if args.truncate: lengths.clamp_(max=args.truncate) sequences = sequences[:, :args.truncate] num_observations = float(lengths.sum()) pyro.set_rng_seed(0) pyro.clear_param_store() pyro.enable_validation(True) # We'll train using MAP Baum-Welch, i.e. MAP estimation while marginalizing # out the hidden state x. This is accomplished via an automatic guide that # learns point estimates of all of our conditional probability tables, # named probs_*. guide = AutoDelta( poutine.block(model, expose_fn=lambda msg: msg["name"].startswith("probs_"))) # To help debug our tensor shapes, let's print the shape of each site's # distribution, value, and log_prob tensor. Note this information is # automatically printed on most errors inside SVI. if args.print_shapes: first_available_dim = -2 if model is model_0 else -3 guide_trace = poutine.trace(guide).get_trace( sequences, lengths, args=args, batch_size=args.batch_size) model_trace = poutine.trace( poutine.replay(poutine.enum(model, first_available_dim), guide_trace)).get_trace(sequences, lengths, args=args, batch_size=args.batch_size) logging.info(model_trace.format_shapes()) # Enumeration requires a TraceEnum elbo and declaring the max_plate_nesting. # All of our models have two plates: "data" and "tones". Elbo = JitTraceEnum_ELBO if args.jit else TraceEnum_ELBO elbo = Elbo(max_plate_nesting=1 if model is model_0 else 2) optim = Adam({'lr': args.learning_rate}) svi = SVI(model, guide, optim, elbo) # We'll train on small minibatches. logging.info('Step\tLoss') for step in range(args.num_steps): loss = svi.step(sequences, lengths, args=args, batch_size=args.batch_size) logging.info('{: >5d}\t{}'.format(step, loss / num_observations)) # We evaluate on the entire training dataset, # excluding the prior term so our results are comparable across models. train_loss = elbo.loss(model, guide, sequences, lengths, args, include_prior=False) logging.info('training loss = {}'.format(train_loss / num_observations)) # Finally we evaluate on the test dataset. logging.info('-' * 40) logging.info('Evaluating on {} test sequences'.format( len(data['test']['sequences']))) sequences = data['test']['sequences'][..., present_notes] lengths = data['test']['sequence_lengths'] if args.truncate: lengths.clamp_(max=args.truncate) num_observations = float(lengths.sum()) # note that since we removed unseen notes above (to make the problem a bit easier and for # numerical stability) this test loss may not be directly comparable to numbers # reported on this dataset elsewhere. test_loss = elbo.loss(model, guide, sequences, lengths, args=args, include_prior=False) logging.info('test loss = {}'.format(test_loss / num_observations)) # We expect models with higher capacity to perform better, # but eventually overfit to the training set. capacity = sum( value.reshape(-1).size(0) for value in pyro.get_param_store().values()) logging.info('{} capacity = {} parameters'.format(model.__name__, capacity))
def main(args): if args.cuda: torch.set_default_tensor_type("torch.cuda.FloatTensor") logging.info("Loading data") data = poly.load_data(poly.JSB_CHORALES) logging.info("-" * 40) model = models[args.model] logging.info("Training {} on {} sequences".format( model.__name__, len(data["train"]["sequences"]))) sequences = data["train"]["sequences"] lengths = data["train"]["sequence_lengths"] # find all the notes that are present at least once in the training set present_notes = (sequences == 1).sum(0).sum(0) > 0 # remove notes that are never played (we remove 37/88 notes) sequences = sequences[..., present_notes] if args.truncate: lengths = lengths.clamp(max=args.truncate) sequences = sequences[:, :args.truncate] num_observations = float(lengths.sum()) pyro.set_rng_seed(args.seed) pyro.clear_param_store() # We'll train using MAP Baum-Welch, i.e. MAP estimation while marginalizing # out the hidden state x. This is accomplished via an automatic guide that # learns point estimates of all of our conditional probability tables, # named probs_*. guide = AutoDelta( poutine.block(model, expose_fn=lambda msg: msg["name"].startswith("probs_"))) # To help debug our tensor shapes, let's print the shape of each site's # distribution, value, and log_prob tensor. Note this information is # automatically printed on most errors inside SVI. if args.print_shapes: first_available_dim = -2 if model is model_0 else -3 guide_trace = poutine.trace(guide).get_trace( sequences, lengths, args=args, batch_size=args.batch_size) model_trace = poutine.trace( poutine.replay(poutine.enum(model, first_available_dim), guide_trace)).get_trace(sequences, lengths, args=args, batch_size=args.batch_size) logging.info(model_trace.format_shapes()) # Enumeration requires a TraceEnum elbo and declaring the max_plate_nesting. # All of our models have two plates: "data" and "tones". optim = Adam({"lr": args.learning_rate}) if args.tmc: if args.jit: raise NotImplementedError( "jit support not yet added for TraceTMC_ELBO") elbo = TraceTMC_ELBO(max_plate_nesting=1 if model is model_0 else 2) tmc_model = poutine.infer_config( model, lambda msg: { "num_samples": args.tmc_num_samples, "expand": False } if msg["infer"].get("enumerate", None) == "parallel" else {}, ) # noqa: E501 svi = SVI(tmc_model, guide, optim, elbo) else: Elbo = JitTraceEnum_ELBO if args.jit else TraceEnum_ELBO elbo = Elbo( max_plate_nesting=1 if model is model_0 else 2, strict_enumeration_warning=(model is not model_7), jit_options={"time_compilation": args.time_compilation}, ) svi = SVI(model, guide, optim, elbo) # We'll train on small minibatches. logging.info("Step\tLoss") for step in range(args.num_steps): loss = svi.step(sequences, lengths, args=args, batch_size=args.batch_size) logging.info("{: >5d}\t{}".format(step, loss / num_observations)) if args.jit and args.time_compilation: logging.debug("time to compile: {} s.".format( elbo._differentiable_loss.compile_time)) # We evaluate on the entire training dataset, # excluding the prior term so our results are comparable across models. train_loss = elbo.loss(model, guide, sequences, lengths, args, include_prior=False) logging.info("training loss = {}".format(train_loss / num_observations)) # Finally we evaluate on the test dataset. logging.info("-" * 40) logging.info("Evaluating on {} test sequences".format( len(data["test"]["sequences"]))) sequences = data["test"]["sequences"][..., present_notes] lengths = data["test"]["sequence_lengths"] if args.truncate: lengths = lengths.clamp(max=args.truncate) num_observations = float(lengths.sum()) # note that since we removed unseen notes above (to make the problem a bit easier and for # numerical stability) this test loss may not be directly comparable to numbers # reported on this dataset elsewhere. test_loss = elbo.loss(model, guide, sequences, lengths, args=args, include_prior=False) logging.info("test loss = {}".format(test_loss / num_observations)) # We expect models with higher capacity to perform better, # but eventually overfit to the training set. capacity = sum( value.reshape(-1).size(0) for value in pyro.get_param_store().values()) logging.info("{} capacity = {} parameters".format(model.__name__, capacity))
# Prepare training data df = rugged_data[["cont_africa", "rugged", "rgdppc_2000"]] df = df[np.isfinite(df.rgdppc_2000)] df["rgdppc_2000"] = np.log(df["rgdppc_2000"]) train = torch.tensor(df.values, dtype=torch.float) svi = SVI(model, guide, optim.Adam({"lr": .005}), loss=Trace_ELBO(), num_samples=1000) is_cont_africa, ruggedness, log_gdp = train[:, 0], train[:, 1], train[:, 2] pyro.clear_param_store() num_iters = 8000 if not smoke_test else 2 for i in range(num_iters): elbo = svi.step(is_cont_africa, ruggedness, log_gdp) if i % 500 == 0: logging.info("Elbo loss: {}".format(elbo)) posterior = svi.run(log_gdp, is_cont_africa, ruggedness) sites = ["a", "bA", "bR", "bAR", "sigma"] for site, values in summary(posterior, sites).items(): print("Site: {}".format(site)) print(values, "\n") def wrapped_model(is_cont_africa, ruggedness, log_gdp): pyro.sample("prediction", Delta(model(is_cont_africa, ruggedness, log_gdp)))
def main(**kwargs): args = argparse.Namespace(**kwargs) if 'save' in args: if os.path.exists(args.save): raise RuntimeError('Output file "{}" already exists.'.format(args.save)) if args.seed is not None: pyro.set_rng_seed(args.seed) X, true_counts = load_data() X_size = X.size(0) if args.cuda: X = X.cuda() # Build a function to compute z_pres prior probabilities. if args.z_pres_prior_raw: def base_z_pres_prior_p(t): return args.z_pres_prior else: base_z_pres_prior_p = make_prior(args.z_pres_prior) # Wrap with logic to apply any annealing. def z_pres_prior_p(opt_step, time_step): p = base_z_pres_prior_p(time_step) if args.anneal_prior == 'none': return p else: decay = dict(lin=lin_decay, exp=exp_decay)[args.anneal_prior] return decay(p, args.anneal_prior_to, args.anneal_prior_begin, args.anneal_prior_duration, opt_step) model_arg_keys = ['window_size', 'rnn_hidden_size', 'decoder_output_bias', 'decoder_output_use_sigmoid', 'baseline_scalar', 'encoder_net', 'decoder_net', 'predict_net', 'embed_net', 'bl_predict_net', 'non_linearity', 'pos_prior_mean', 'pos_prior_sd', 'scale_prior_mean', 'scale_prior_sd'] model_args = {key: getattr(args, key) for key in model_arg_keys if key in args} air = AIR( num_steps=args.model_steps, x_size=50, use_masking=not args.no_masking, use_baselines=not args.no_baselines, z_what_size=args.encoder_latent_size, use_cuda=args.cuda, **model_args ) if args.verbose: print(air) print(args) if 'load' in args: print('Loading parameters...') air.load_state_dict(torch.load(args.load)) vis = visdom.Visdom(env=args.visdom_env) # Viz sample from prior. if args.viz: z, x = air.prior(5, z_pres_prior_p=partial(z_pres_prior_p, 0)) vis.images(draw_many(x, tensor_to_objs(latents_to_tensor(z)))) def per_param_optim_args(module_name, param_name): lr = args.baseline_learning_rate if 'bl_' in param_name else args.learning_rate return {'lr': lr} svi = SVI(air.model, air.guide, optim.Adam(per_param_optim_args), loss=TraceGraph_ELBO()) # Do inference. t0 = time.time() examples_to_viz = X[5:10] for i in range(1, args.num_steps + 1): loss = svi.step(X, args.batch_size, z_pres_prior_p=partial(z_pres_prior_p, i)) if args.progress_every > 0 and i % args.progress_every == 0: print('i={}, epochs={:.2f}, elapsed={:.2f}, elbo={:.2f}'.format( i, (i * args.batch_size) / X_size, (time.time() - t0) / 3600, loss / X_size)) if args.viz and i % args.viz_every == 0: trace = poutine.trace(air.guide).get_trace(examples_to_viz, None) z, recons = poutine.replay(air.prior, trace=trace)(examples_to_viz.size(0)) z_wheres = tensor_to_objs(latents_to_tensor(z)) # Show data with inferred objection positions. vis.images(draw_many(examples_to_viz, z_wheres)) # Show reconstructions of data. vis.images(draw_many(recons, z_wheres)) if args.eval_every > 0 and i % args.eval_every == 0: # Measure accuracy on subset of training data. acc, counts, error_z, error_ix = count_accuracy(X, true_counts, air, 1000) print('i={}, accuracy={}, counts={}'.format(i, acc, counts.numpy().tolist())) if args.viz and error_ix.size(0) > 0: vis.images(draw_many(X[error_ix[0:5]], tensor_to_objs(error_z[0:5])), opts=dict(caption='errors ({})'.format(i))) if 'save' in args and i % args.save_every == 0: print('Saving parameters...') torch.save(air.state_dict(), args.save)
"beta_q", torch.tensor(15.0), constraint=constraints.positive ) # sample heads_prob from the distribution Beta(alpha_q, beta_q) pyro.sample("heads_prob", dist.Beta(alpha_q, beta_q)) # generate data and set up optimizer data = torch.tensor([1.0] * 20 + [0.0] * 10) optimizer = Adam({"lr": 0.0005, "betas": (0.90, 0.999)}) # setup the inference algorithm svi = SVI(model, guide, optimizer, loss=Trace_ELBO(num_particles=10)) n_steps = 5000 # do gradient steps for step in range(n_steps): loss = svi.step(data) if step % 100 == 0: print(loss) print(pyro.param("alpha_q").item()) print(pyro.param("beta_q").item()) # true posterior mean (20 + 10) / (30 + 20) # estimated posterior mean pyro.param("beta_q") / pyro.param("alpha_q")
def train(self, *, raw_expr, encoded_expr, num_epochs=100, batch_size=32, learning_rate=1e-3, eval_every=10, test_proportion=0.05, use_l1=False, l1_lam=0): seed = 2556 torch.manual_seed(seed) pyro.set_rng_seed(seed) pyro.clear_param_store() logging.info('Validating data ...') assert (raw_expr.shape == encoded_expr.shape) read_depth = raw_expr.sum(-1)[:, np.newaxis] encoded_expr = np.hstack([encoded_expr, np.log(read_depth)]) read_depth = torch.tensor(read_depth).to(self.device) raw_expr = torch.tensor(raw_expr).to(self.device) encoded_expr = torch.tensor(encoded_expr).to(self.device) logging.info('Initializing model ...') self.optimizer = Adam({"lr": 1e-3}) self.loss = TraceMeanField_ELBO() if not use_l1: logging.info('No L1 regularization.') svi = SVI(self.model, self.guide, self.optimizer, loss=self.loss) test_set = np.random.rand(read_depth.shape[0]) < test_proportion train_set = ~test_set logging.info("Training with {} cells, testing with {}.".format( str(train_set.sum()), str(test_set.sum()))) logging.info('Training ...') try: for epoch in range(1, num_epochs + 1): running_loss = 0.0 for batch in self.epoch_batch(raw_expr[train_set], encoded_expr[train_set], read_depth[train_set], batch_size=batch_size): if use_l1: loss = self.custom_step(*batch) else: loss = svi.step(*batch) running_loss += loss / batch_size logging.info('Done epoch {}/{}. Training loss: {:.3e}'.format( str(epoch), str(num_epochs), running_loss)) if (epoch % eval_every == 0 or epoch == num_epochs) and test_set.sum() > 0: test_logp = self.evaluate(raw_expr[test_set], encoded_expr[test_set], read_depth[test_set]) logging.info('Test logp: {:.4e}'.format(test_logp)) except KeyboardInterrupt: logging.error('Interrupted training.') self.summarize_posterior(raw_expr, encoded_expr, read_depth) return self
# Do inference. def per_param_optim_args(module_name, param_name, tags): lr = 1e-3 if 'baseline' in tags else 1e-4 return {'lr': lr} svi = SVI(air.model, air.guide, optim.Adam(per_param_optim_args), loss='ELBO', trace_graph=True) for i in range(1, args.num_steps + 1): loss = svi.step(X, args.batch_size, z_pres_prior_p=partial(z_pres_prior_p, i)) if args.progress_every > 0 and i % args.progress_every == 0: print('i={}, epochs={:.2f}, elapsed={:.2f}, elbo={:.2f}'.format( i, (i * args.batch_size) / X_size, (time.time() - t0) / 3600, loss / X_size)) if args.viz and i % args.viz_every == 0: trace = poutine.trace(air.guide).get_trace(examples_to_viz, None) z, recons = poutine.replay(air.prior, trace)(examples_to_viz.size(0)) z_wheres = post_process_latents(z) # Show data with inferred objection positions. vis.images(draw_many(examples_to_viz, z_wheres))
def main(): # parse command line arguments parser = argparse.ArgumentParser(description="parse args") parser.add_argument('-n', '--num-epochs', default=101, type=int, help='number of training epochs') parser.add_argument('-tf', '--test-frequency', default=5, type=int, help='how often we evaluate the test set') parser.add_argument('-lr', '--learning-rate', default=1.0e-3, type=float, help='learning rate') parser.add_argument('-b1', '--beta1', default=0.95, type=float, help='beta1 adam hyperparameter') parser.add_argument('--cuda', action='store_true', default=False, help='whether to use cuda') parser.add_argument('-visdom', '--visdom_flag', default=False, help='Whether plotting in visdom is desired') parser.add_argument('-i-tsne', '--tsne_iter', default=100, type=int, help='epoch when tsne visualization runs') args = parser.parse_args() # setup MNIST data loaders # train_loader, test_loader train_loader, test_loader = setup_data_loaders(MNIST, use_cuda=args.cuda, batch_size=256) # setup the VAE vae = VAE(use_cuda=args.cuda) # setup the optimizer adam_args = {"lr": args.learning_rate} optimizer = Adam(adam_args) # setup the inference algorithm svi = SVI(vae.model, vae.guide, optimizer, loss="ELBO") # setup visdom for visualization if args.visdom_flag: vis = visdom.Visdom() train_elbo = [] test_elbo = [] # training loop for epoch in range(args.num_epochs): # initialize loss accumulator epoch_loss = 0. # do a training epoch over each mini-batch x returned # by the data loader for _, (x, _) in enumerate(train_loader): # if on GPU put mini-batch into CUDA memory if args.cuda: x = x.cuda() # wrap the mini-batch in a PyTorch Variable x = Variable(x) # do ELBO gradient and accumulate loss epoch_loss += svi.step(x) # report training diagnostics normalizer_train = len(train_loader.dataset) total_epoch_loss_train = epoch_loss / normalizer_train train_elbo.append(total_epoch_loss_train) print("[epoch %03d] average training loss: %.4f" % (epoch, total_epoch_loss_train)) if epoch % args.test_frequency == 0: # initialize loss accumulator test_loss = 0. # compute the loss over the entire test set for i, (x, _) in enumerate(test_loader): # if on GPU put mini-batch into CUDA memory if args.cuda: x = x.cuda() # wrap the mini-batch in a PyTorch Variable x = Variable(x) # compute ELBO estimate and accumulate loss test_loss += svi.evaluate_loss(x) # pick three random test images from the first mini-batch and # visualize how well we're reconstructing them if i == 0: if args.visdom_flag: plot_vae_samples(vae, vis) reco_indices = np.random.randint(0, x.size(0), 3) for index in reco_indices: test_img = x[index, :] reco_img = vae.reconstruct_img(test_img) vis.image(test_img.contiguous().view(28, 28).data.cpu().numpy(), opts={'caption': 'test image'}) vis.image(reco_img.contiguous().view(28, 28).data.cpu().numpy(), opts={'caption': 'reconstructed image'}) # report test diagnostics normalizer_test = len(test_loader.dataset) total_epoch_loss_test = test_loss / normalizer_test test_elbo.append(total_epoch_loss_test) print("[epoch %03d] average test loss: %.4f" % (epoch, total_epoch_loss_test)) if epoch == args.tsne_iter: mnist_test_tsne(vae=vae, test_loader=test_loader) plot_llk(np.array(train_elbo), np.array(test_elbo)) return vae
def _test_vectorized_map_data_in_elbo(self, n_superfluous_top, n_superfluous_bottom, n_steps): pyro.clear_param_store() self.data_tensor = Variable(torch.zeros(9, 2)) for _out in range(self.n_outer): for _in in range(self.n_inner): self.data_tensor[3 * _out + _in, :] = self.data[_out][_in] def model(): mu_latent = pyro.sample( "mu_latent", dist.Normal(self.mu0, torch.pow(self.lam0, -0.5), reparameterized=False)) def obs_inner(i, _i, _x): for k in range(n_superfluous_top): pyro.sample("z_%d_%d" % (i, k), dist.Normal(ng_zeros(4 - i, 1), ng_ones(4 - i, 1), reparameterized=False)) pyro.observe("obs_%d" % i, dist.normal, _x, mu_latent, torch.pow(self.lam, -0.5)) for k in range(n_superfluous_top, n_superfluous_top + n_superfluous_bottom): pyro.sample("z_%d_%d" % (i, k), dist.Normal(ng_zeros(4 - i, 1), ng_ones(4 - i, 1), reparameterized=False)) def obs_outer(i, x): pyro.map_data("map_obs_inner_%d" % i, x, lambda _i, _x: obs_inner(i, _i, _x), batch_size=4 - i) pyro.map_data("map_obs_outer", [self.data_tensor[0:4, :], self.data_tensor[4:7, :], self.data_tensor[7:9, :]], lambda i, x: obs_outer(i, x), batch_size=3) return mu_latent pt_mu_baseline = torch.nn.Linear(1, 1) pt_superfluous_baselines = [] for k in range(n_superfluous_top + n_superfluous_bottom): pt_superfluous_baselines.extend([torch.nn.Linear(2, 4), torch.nn.Linear(2, 3), torch.nn.Linear(2, 2)]) def guide(): mu_q = pyro.param("mu_q", Variable(self.analytic_mu_n.data + 0.094 * torch.ones(2), requires_grad=True)) log_sig_q = pyro.param("log_sig_q", Variable( self.analytic_log_sig_n.data - 0.11 * torch.ones(2), requires_grad=True)) sig_q = torch.exp(log_sig_q) trivial_baseline = pyro.module("mu_baseline", pt_mu_baseline, tags="baseline") baseline_value = trivial_baseline(ng_ones(1)) mu_latent = pyro.sample("mu_latent", dist.Normal(mu_q, sig_q, reparameterized=False), baseline=dict(baseline_value=baseline_value)) def obs_inner(i, _i, _x): for k in range(n_superfluous_top + n_superfluous_bottom): z_baseline = pyro.module("z_baseline_%d_%d" % (i, k), pt_superfluous_baselines[3 * k + i], tags="baseline") baseline_value = z_baseline(mu_latent.detach()).unsqueeze(-1) mean_i = pyro.param("mean_%d_%d" % (i, k), Variable(0.5 * torch.ones(4 - i, 1), requires_grad=True)) pyro.sample("z_%d_%d" % (i, k), dist.Normal(mean_i, ng_ones(4 - i, 1), reparameterized=False), baseline=dict(baseline_value=baseline_value)) def obs_outer(i, x): pyro.map_data("map_obs_inner_%d" % i, x, lambda _i, _x: obs_inner(i, _i, _x), batch_size=4 - i) pyro.map_data("map_obs_outer", [self.data_tensor[0:4, :], self.data_tensor[4:7, :], self.data_tensor[7:9, :]], lambda i, x: obs_outer(i, x), batch_size=3) return mu_latent def per_param_callable(module_name, param_name, tags): if 'baseline' in tags: return {"lr": 0.010, "betas": (0.95, 0.999)} else: return {"lr": 0.0012, "betas": (0.95, 0.999)} adam = optim.Adam(per_param_callable) svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True) for step in range(n_steps): svi.step() mu_error = param_abs_error("mu_q", self.analytic_mu_n) log_sig_error = param_abs_error("log_sig_q", self.analytic_log_sig_n) if n_superfluous_top > 0 or n_superfluous_bottom > 0: superfluous_errors = [] for k in range(n_superfluous_top + n_superfluous_bottom): mean_0_error = torch.sum(torch.pow(pyro.param("mean_0_%d" % k), 2.0)) mean_1_error = torch.sum(torch.pow(pyro.param("mean_1_%d" % k), 2.0)) mean_2_error = torch.sum(torch.pow(pyro.param("mean_2_%d" % k), 2.0)) superfluous_error = torch.max(torch.max(mean_0_error, mean_1_error), mean_2_error) superfluous_errors.append(superfluous_error.data.cpu().numpy()[0]) if step % 500 == 0 and self.verbose: print("mu error, log(sigma) error: %.4f, %.4f" % (mu_error, log_sig_error)) if n_superfluous_top > 0 or n_superfluous_bottom > 0: print("superfluous error: %.4f" % np.max(superfluous_errors)) self.assertEqual(0.0, mu_error, prec=0.04) self.assertEqual(0.0, log_sig_error, prec=0.05) if n_superfluous_top > 0 or n_superfluous_bottom > 0: self.assertEqual(0.0, np.max(superfluous_errors), prec=0.04)
class SVILossCompute(LossCompute): """A simple loss compute and train function.""" def __init__(self, generator, model, guide, optimizer, optim_params, elbo_type='TraceELBO', num_particles=1, eval=False, step=1. / 30000.0, aux_model=None, aux_guide=None): optim = self.getOptimizer(optimizer, optim_params) elbo = self.getELBO(elbo_type, num_particles) criterion = SVI(model, guide, optim, loss=elbo) super(SVILossCompute, self).__init__(generator, criterion, optim) self.eval = eval self.guide = guide self.model = model self.kl_anneal = step self.step = step self.aux_criterion = None #hack to get only KL term self.model_no_obs = poutine.block(model, hide=["preds", 'lm_preds']) optim = self.getOptimizer(optimizer, optim_params) elbo = self.getELBO(elbo_type, num_particles) self.kl_eval_svi = SVI(self.model_no_obs, self.guide, optim, elbo) #aux model and guide are for calculating additional loss terms... if aux_model is not None and aux_guide is not None: print('setting aux loss, ') logging.info("setting aux loss") optim = self.getOptimizer(optimizer, optim_params) elbo = self.getELBO(elbo_type, num_particles) self.aux_criterion = SVI(aux_model, aux_guide, optim, loss=elbo) self.aux_guide = aux_guide self.aux_model = aux_model def setKLAnnealingSchedule(self, step_size, kl_anneal): """ step_size: how much to increase weight of KL term at each step beta: current weight of kl term """ self.step = step_size self.kl_anneal = kl_anneal def getKLAnnealingSchedule(self): return self.step, self.kl_anneal def getOptimizerStateDict(self): return self.criterion.optim.get_state() def setOptimizerStateDict(self, state_dict): return self.criterion.optim.set_state(state_dict) def getELBO(self, elbo_type, particles): if elbo_type == 'TraceELBO': return Trace_ELBO(num_particles=particles) elif elbo_type == "MeanFieldELBO": return TraceMeanField_ELBO(num_particles=particles) else: raise ValueError("{} ELBO not supported".format(elbo_type)) def getOptimizer(self, optimizer, optim_params): if optimizer == 'clippedadam': return PyroOptim(ClippedAdam, optim_params) elif optimizer == 'adadelta': #not 100% on this but pretty sure ** "dereferences" the dictionary return Adadelta(optim_params) elif optimizer == 'clippedadadelta': #since it's custom, gotta set it up in the way Pyro expects return PyroOptim(ClippedAdadelta, optim_params) else: raise ValueError("{} optimizer not supported".format(optimizer)) def __call__(self, src, trg, src_mask, trg_mask, src_lengths, trg_lengths, trg_y, norm): #x = self.generator(x) kl_anneal = self.kl_anneal if self.eval: #you could also do .eval_loss or something but this allows a bit more probing of results with torch.no_grad(): elbo = self.criterion.evaluate_loss( src, trg, src_mask, trg_mask, src_lengths, trg_lengths, trg_y) * norm kl_term = self.kl_eval_svi.evaluate_loss( src, trg, src_mask, trg_mask, src_lengths, trg_lengths, trg_y) * norm nll = elbo - kl_term def torch_item(x): return x if isinstance(x, numbers.Number) else x.item() if self.aux_criterion is not None: aux_loss = self.aux_criterion.evaluate_loss( src, trg, src_mask, trg_mask, src_lengths, trg_lengths, trg_y) else: aux_loss = -1.0 loss = { 'elbo': elbo, 'nll': nll, 'approx_kl': kl_term, 'aux_loss': aux_loss } else: loss = self.criterion.step(src, trg, src_mask, trg_mask, src_lengths, trg_lengths, trg_y, kl_anneal) if self.aux_criterion is not None: aux_loss = self.aux_criterion.step(src, trg, src_mask, trg_mask, src_lengths, trg_lengths, trg_y, kl_anneal) loss = loss * norm self.kl_anneal = min(self.kl_anneal + self.step, 1.0) return loss
def main(args): """ Train GAE """ print("Using {} dataset".format(args.dataset_str)) # Load data np.random.seed(1) adj, features = load_data(args.dataset_str) N, D = features.shape # Store original adjacency matrix (without diagonal entries) adj_orig = adj adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) # Some preprocessing adj_train_norm = preprocess_graph(adj_train) adj_train_norm = Variable(make_sparse(adj_train_norm)) adj_train_labels = Variable( torch.FloatTensor(adj_train + sp.eye(adj_train.shape[0]).todense())) features = Variable(make_sparse(features)) n_edges = adj_train_labels.sum() data = { 'adj_norm': adj_train_norm, 'adj_labels': adj_train_labels, 'features': features, } gae = GAE(data, n_hidden=32, n_latent=16, dropout=args.dropout, subsampling=args.subsampling) optimizer = Adam({"lr": args.lr, "betas": (0.95, 0.999)}) svi = SVI(gae.model, gae.guide, optimizer, loss="ELBO") # Results results = defaultdict(list) # Full batch training loop for epoch in range(args.num_epochs): # initialize loss accumulator epoch_loss = 0. # do ELBO gradient and accumulate loss epoch_loss += svi.step() # report training diagnostics if args.subsampling: normalized_loss = epoch_loss / float(2 * n_edges) else: normalized_loss = epoch_loss / (2 * N * N) results['train_elbo'].append(normalized_loss) # Training loss emb = gae.get_embeddings() accuracy, roc_curr, ap_curr, = eval_gae(val_edges, val_edges_false, emb, adj_orig) results['accuracy_train'].append(accuracy) results['roc_train'].append(roc_curr) results['ap_train'].append(ap_curr) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(normalized_loss), "train_acc=", "{:.5f}".format(accuracy), "val_roc=", "{:.5f}".format(roc_curr), "val_ap=", "{:.5f}".format(ap_curr)) # Test loss if epoch % args.test_freq == 0: emb = gae.get_embeddings() accuracy, roc_score, ap_score = eval_gae(test_edges, test_edges_false, emb, adj_orig) results['accuracy_test'].append(accuracy) results['roc_test'].append(roc_curr) results['ap_test'].append(ap_curr) print("Optimization Finished!") # Test loss emb = gae.get_embeddings() accuracy, roc_score, ap_score = eval_gae(test_edges, test_edges_false, emb, adj_orig) print('Test Accuracy: ' + str(accuracy)) print('Test ROC score: ' + str(roc_score)) print('Test AP score: ' + str(ap_score)) # Plot plot_results(results, args.test_freq, path=args.dataset_str + "_results.png")
from pyro.infer import Trace_ELBO svi = SVI(model, guide, optimizer, loss=Trace_ELBO()) #, num_particles=7) n_steps = 10 * 400000 # do gradient steps for step in range(1, n_steps): if step % 100 == 1: print("DOING A STEP") print(".......") print(step) # quit() # for name in pyro.get_param_store().get_all_param_names(): # print [name, pyro.param(name).data.numpy()] svi.step(corpus) if step % 2000 == 0: print("Saving") save_path = "../raw-results/" #save_path = "/afs/cs.stanford.edu/u/mhahn/scr/deps/" with open( "output/" + args.language + "_" + __file__ + "_model_" + str(myID) + ".tsv", "w") as outFile: print >> outFile, ("\t".join( list( map(str, [ "Counter", "Document", "DH_Mean_NoPunct", "DH_Sigma_NoPunct", "Distance_Mean_NoPunct", "Distance_Sigma_NoPunct", "Dependency" ]))))
# Initialize the SVI optimzation class my_svi = SVI(model=model_gamma, guide= my_guide, optim=ClippedAdam({"lr": 0.01, 'clip_norm': 1.0}), loss=Trace_ELBO()) losses = [] start_time = time.time() # Perform optimization for i in range(5000): loss = my_svi.step(X_train_torch, y_train_torch, california.feature_names) normalized_loss = loss/X_train_torch.shape[0] # Tabulate the loss for plotting losses.append(normalized_loss) if (i % 250 == 0): print(f'iter: {i}, normalized loss:{round(normalized_loss,2)}') # In[53]:
for epoch in range(30): print("Start epoch!") # initialize loss accumulator epoch_loss = 0. # do a training epoch over each mini-batch x # returned by the data loader for convo_i in range(dataset.size()): x, y = dataset.next_batch() #HACK for overfitting # y = [100, 30, 11, 1, 0, 24, 8, 4, 17, 11, 1, 6, 0, 9, 4, 8, 6, 24, 9, 1, 101] x = dataset.to_onehot(x, long_type=False) y = dataset.to_onehot(y, long_type=False) # do ELBO gradient and accumulate loss if USE_CUDA: loss = svi.step(x.cuda(), y.cuda(), convo_i) else: loss = svi.step(x, y, convo_i) epoch_loss += loss # print loss if convo_i % 10 == 0: print("Epoch: {}, Step: {}, NLL: {}".format(epoch, convo_i, loss)) print("---------------------------\n") print("\n\nTrained epoch: {}, epoch loss: {}\n\n".format(epoch, epoch_loss))
lifted_module = pyro.random_module("module", net, priors) return lifted_module() optim = Adam({"lr": 0.01}) svi = SVI(model, guide, optim, loss=Trace_ELBO()) num_iterations = 5 loss = 0 for j in range(1000): loss = 0 for batch_id, data in enumerate(X_train): # calculate the loss and take a gradient step loss += svi.step(data[0].view(-1, 1), data[1]) normalizer_train = len(train_loader.dataset) total_epoch_loss_train = loss / normalizer_train print("Epoch ", j, " Loss ", total_epoch_loss_train) num_samples = 10 def predict(x): sampled_models = [guide(None, None) for _ in range(num_samples)] yhats = [model(x).data for model in sampled_models] mean = torch.mean(torch.stack(yhats), 0) return torch.argmax(mean, dim=1)
svae = PhoneVAE(batch_size=1) optimizer = Adam(ADAM_CONFIG) svi = SVI(svae.model, svae.guide, optimizer, loss=Trace_ELBO()) """ Train the model """ train_elbo = [] for e in range(NUM_EPOCHS): epoch_loss = 0. for string in TEST_STRINGS: # Pad input string differently than observed string so program doesn't get rewarded by making string short one_hot_string = strings_to_tensor([string], MAX_STRING_LEN) if CUDA: one_hot_string.cuda() svi.step(one_hot_string) epoch_loss += svi.step(one_hot_string) if e % RECORD_EVERY == 0: avg_epoch_loss = epoch_loss/len(TEST_STRINGS) print(f"Epoch #{e} Average Loss: {avg_epoch_loss}") train_elbo.append(avg_epoch_loss) epoch_loss = 0 plt.plot(train_elbo) plt.title("ELBO") plt.xlabel("step") plt.ylabel("loss") plt.savefig(f"result/{SESSION_NAME}.png")
def main(args): # Fix random number seed pyro.util.set_rng_seed(args.seed) # Enable optional validation warnings pyro.enable_validation(True) # Load and pre-process data dataloader, num_genes, l_mean, l_scale, anndata = get_data(dataset=args.dataset, batch_size=args.batch_size, cuda=args.cuda) # Instantiate instance of model/guide and various neural networks scanvi = SCANVI(num_genes=num_genes, num_labels=4, l_loc=l_mean, l_scale=l_scale, scale_factor=1.0 / (args.batch_size * num_genes)) if args.cuda: scanvi.cuda() # Setup an optimizer (Adam) and learning rate scheduler. # By default we start with a moderately high learning rate (0.005) # and reduce by a factor of 5 after 20 epochs. scheduler = MultiStepLR({'optimizer': Adam, 'optim_args': {'lr': args.learning_rate}, 'milestones': [20], 'gamma': 0.2}) # Tell Pyro to enumerate out y when y is unobserved guide = config_enumerate(scanvi.guide, "parallel", expand=True) # Setup a variational objective for gradient-based learning. # Note we use TraceEnum_ELBO in order to leverage Pyro's machinery # for automatic enumeration of the discrete latent variable y. elbo = TraceEnum_ELBO(strict_enumeration_warning=False) svi = SVI(scanvi.model, guide, scheduler, elbo) # Training loop for epoch in range(args.num_epochs): losses = [] for x, y in dataloader: if y is not None: y = y.type_as(x) loss = svi.step(x, y) losses.append(loss) # Tell the scheduler we've done one epoch. scheduler.step() print("[Epoch %04d] Loss: %.5f" % (epoch, np.mean(losses))) # Put neural networks in eval mode (needed for batchnorm) scanvi.eval() # Now that we're done training we'll inspect the latent representations we've learned if args.plot and args.dataset == 'pbmc': import scanpy as sc # Compute latent representation (z2_loc) for each cell in the dataset latent_rep = scanvi.z2l_encoder(dataloader.data_x)[0] # Compute inferred cell type probabilities for each cell y_logits = scanvi.classifier(latent_rep) y_probs = softmax(y_logits, dim=-1).data.cpu().numpy() # Use scanpy to compute 2-dimensional UMAP coordinates using our # learned 10-dimensional latent representation z2 anndata.obsm["X_scANVI"] = latent_rep.data.cpu().numpy() sc.pp.neighbors(anndata, use_rep="X_scANVI") sc.tl.umap(anndata) umap1, umap2 = anndata.obsm['X_umap'][:, 0], anndata.obsm['X_umap'][:, 1] # Construct plots; all plots are scatterplots depicting the two-dimensional UMAP embedding # and only differ in how points are colored # The topmost plot depicts the 200 hand-curated seed labels in our dataset fig, axes = plt.subplots(3, 2) seed_marker_sizes = anndata.obs['seed_marker_sizes'] axes[0, 0].scatter(umap1, umap2, s=seed_marker_sizes, c=anndata.obs['seed_colors'], marker='.', alpha=0.7) axes[0, 0].set_title('Hand-Curated Seed Labels') patch1 = Patch(color='lightcoral', label='CD8-Naive') patch2 = Patch(color='limegreen', label='CD4-Naive') patch3 = Patch(color='deepskyblue', label='CD4-Memory') patch4 = Patch(color='mediumorchid', label='CD4-Regulatory') axes[0, 1].legend(loc='center left', handles=[patch1, patch2, patch3, patch4]) axes[0, 1].get_xaxis().set_visible(False) axes[0, 1].get_yaxis().set_visible(False) axes[0, 1].set_frame_on(False) # The remaining plots depict the inferred cell type probability for each of the four cell types s10 = axes[1, 0].scatter(umap1, umap2, s=1, c=y_probs[:, 0], marker='.', alpha=0.7) axes[1, 0].set_title('Inferred CD8-Naive probability') fig.colorbar(s10, ax=axes[1, 0]) s11 = axes[1, 1].scatter(umap1, umap2, s=1, c=y_probs[:, 1], marker='.', alpha=0.7) axes[1, 1].set_title('Inferred CD4-Naive probability') fig.colorbar(s11, ax=axes[1, 1]) s20 = axes[2, 0].scatter(umap1, umap2, s=1, c=y_probs[:, 2], marker='.', alpha=0.7) axes[2, 0].set_title('Inferred CD4-Memory probability') fig.colorbar(s20, ax=axes[2, 0]) s21 = axes[2, 1].scatter(umap1, umap2, s=1, c=y_probs[:, 3], marker='.', alpha=0.7) axes[2, 1].set_title('Inferred CD4-Regulatory probability') fig.colorbar(s21, ax=axes[2, 1]) fig.tight_layout() plt.savefig('scanvi.pdf')
def main(args): # Init tensorboard writer = SummaryWriter('./runs/' + args.runname + str(args.trialnumber)) model_name = 'VanillaDMM' # Set evaluation log file evaluation_logpath = './logs/{}/evaluation_result.log'.format( model_name.lower()) log_evaluation(evaluation_logpath, 'Evaluation Trial - {}\n'.format(args.trialnumber)) # Constants time_length = 30 input_length_for_pred = 20 pred_length = time_length - input_length_for_pred train_batch_size = 16 valid_batch_size = 1 # For model input_channels = 1 z_channels = 50 emission_channels = [64, 32] transition_channels = 64 encoder_channels = [32, 64] rnn_input_dim = 256 rnn_channels = 128 kernel_size = 3 pred_length = 0 # Device checking use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") # Make dataset logging.info("Generate data") train_datapath = args.datapath / 'train' valid_datapath = args.datapath / 'valid' train_dataset = DiffusionDataset(train_datapath) valid_dataset = DiffusionDataset(valid_datapath) # Create data loaders from pickle data logging.info("Generate data loaders") train_dataloader = DataLoader( train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=8) valid_dataloader = DataLoader( valid_dataset, batch_size=valid_batch_size, num_workers=4) # Training parameters width = 100 height = 100 input_dim = width * height # Create model logging.warning("Generate model") logging.warning(input_dim) pred_input_dim = 10 dmm = DMM(input_channels=input_channels, z_channels=z_channels, emission_channels=emission_channels, transition_channels=transition_channels, encoder_channels=encoder_channels, rnn_input_dim=rnn_input_dim, rnn_channels=rnn_channels, kernel_size=kernel_size, height=height, width=width, pred_input_dim=pred_input_dim, num_layers=1, rnn_dropout_rate=0.0, num_iafs=0, iaf_dim=50, use_cuda=use_cuda) # Initialize model logging.info("Initialize model") epochs = args.endepoch learning_rate = 0.0001 beta1 = 0.9 beta2 = 0.999 clip_norm = 10.0 lr_decay = 1.0 weight_decay = 0 adam_params = {"lr": learning_rate, "betas": (beta1, beta2), "clip_norm": clip_norm, "lrd": lr_decay, "weight_decay": weight_decay} adam = ClippedAdam(adam_params) elbo = Trace_ELBO() svi = SVI(dmm.model, dmm.guide, adam, loss=elbo) # saves the model and optimizer states to disk save_model = Path('./checkpoints/' + model_name) def save_checkpoint(epoch): save_dir = save_model / '{}.model'.format(epoch) save_opt_dir = save_model / '{}.opt'.format(epoch) logging.info("saving model to %s..." % save_dir) torch.save(dmm.state_dict(), save_dir) logging.info("saving optimizer states to %s..." % save_opt_dir) adam.save(save_opt_dir) logging.info("done saving model and optimizer checkpoints to disk.") # Starting epoch start_epoch = args.startepoch # loads the model and optimizer states from disk if start_epoch != 0: load_opt = './checkpoints/' + model_name + \ '/e{}-i188-opt-tn{}.opt'.format(start_epoch - 1, args.trialnumber) load_model = './checkpoints/' + model_name + \ '/e{}-i188-tn{}.pt'.format(start_epoch - 1, args.trialnumber) def load_checkpoint(): # assert exists(load_opt) and exists(load_model), \ # "--load-model and/or --load-opt misspecified" logging.info("loading model from %s..." % load_model) dmm.load_state_dict(torch.load(load_model, map_location=device)) # logging.info("loading optimizer states from %s..." % load_opt) # adam.load(load_opt) # logging.info("done loading model and optimizer states.") if load_model != '': logging.info('Load checkpoint') load_checkpoint() # Validation only? validation_only = args.validonly # Train the model if not validation_only: logging.info("Training model") annealing_epochs = 1000 minimum_annealing_factor = 0.2 N_train_size = 3000 N_mini_batches = int(N_train_size / train_batch_size + int(N_train_size % train_batch_size > 0)) for epoch in tqdm(range(start_epoch, epochs), desc='Epoch', leave=True): r_loss_train = 0 dmm.train(True) idx = 0 mov_avg_loss = 0 mov_data_len = 0 for which_mini_batch, data in enumerate(tqdm(train_dataloader, desc='Train', leave=True)): if annealing_epochs > 0 and epoch < annealing_epochs: # compute the KL annealing factor approriate for the current mini-batch in the current epoch min_af = minimum_annealing_factor annealing_factor = min_af + (1.0 - min_af) * \ (float(which_mini_batch + epoch * N_mini_batches + 1) / float(annealing_epochs * N_mini_batches)) else: # by default the KL annealing factor is unity annealing_factor = 1.0 data['observation'] = normalize( data['observation'].unsqueeze(2).to(device)) batch_size, length, _, w, h = data['observation'].shape data_reversed = reverse_sequences(data['observation']) data_mask = torch.ones( batch_size, length, input_channels, w, h).cuda() loss = svi.step(data['observation'], data_reversed, data_mask, annealing_factor) # Running losses mov_avg_loss += loss mov_data_len += batch_size r_loss_train += loss idx += 1 # Average losses train_loss_avg = r_loss_train / (len(train_dataset) * time_length) writer.add_scalar('Loss/train', train_loss_avg, epoch) logging.info("Epoch: %d, Training loss: %1.5f", epoch, train_loss_avg) # # Time to time evaluation if epoch == epochs - 1: for temp_pred_length in [20]: r_loss_valid = 0 r_loss_loc_valid = 0 r_loss_scale_valid = 0 r_loss_latent_valid = 0 dmm.train(False) val_pred_length = temp_pred_length val_pred_input_length = 10 with torch.no_grad(): for i, data in enumerate(tqdm(valid_dataloader, desc='Eval', leave=True)): data['observation'] = normalize( data['observation'].unsqueeze(2).to(device)) batch_size, length, _, w, h = data['observation'].shape data_reversed = reverse_sequences( data['observation']) data_mask = torch.ones( batch_size, length, input_channels, w, h).cuda() pred_tensor = data['observation'][:, :input_length_for_pred, :, :, :] pred_tensor_reversed = reverse_sequences( pred_tensor) pred_tensor_mask = torch.ones( batch_size, input_length_for_pred, input_channels, w, h).cuda() ground_truth = data['observation'][:, input_length_for_pred:, :, :, :] val_nll = svi.evaluate_loss( data['observation'], data_reversed, data_mask) preds, _, loss_loc, loss_scale = do_prediction_rep_inference( dmm, pred_tensor_mask, val_pred_length, val_pred_input_length, data['observation']) ground_truth = denormalize( data['observation'].squeeze().cpu().detach() ) pred_with_input = denormalize( torch.cat( [data['observation'][:, :-val_pred_length, :, :, :].squeeze(), preds.squeeze()], dim=0 ).cpu().detach() ) # Running losses r_loss_valid += val_nll r_loss_loc_valid += loss_loc r_loss_scale_valid += loss_scale # Average losses valid_loss_avg = r_loss_valid / \ (len(valid_dataset) * time_length) valid_loss_loc_avg = r_loss_loc_valid / \ (len(valid_dataset) * val_pred_length * width * height) valid_loss_scale_avg = r_loss_scale_valid / \ (len(valid_dataset) * val_pred_length * width * height) writer.add_scalar('Loss/test', valid_loss_avg, epoch) writer.add_scalar( 'Loss/test_obs', valid_loss_loc_avg, epoch) writer.add_scalar('Loss/test_scale', valid_loss_scale_avg, epoch) logging.info("Validation loss: %1.5f", valid_loss_avg) logging.info("Validation obs loss: %1.5f", valid_loss_loc_avg) logging.info("Validation scale loss: %1.5f", valid_loss_scale_avg) log_evaluation(evaluation_logpath, "Validation obs loss for {}s pred {}: {}\n".format( val_pred_length, args.trialnumber, valid_loss_loc_avg)) log_evaluation(evaluation_logpath, "Validation scale loss for {}s pred {}: {}\n".format( val_pred_length, args.trialnumber, valid_loss_scale_avg)) # Save model if epoch % 50 == 0 or epoch == epochs - 1: torch.save(dmm.state_dict(), args.modelsavepath / model_name / 'e{}-i{}-tn{}.pt'.format(epoch, idx, args.trialnumber)) adam.save(args.modelsavepath / model_name / 'e{}-i{}-opt-tn{}.opt'.format(epoch, idx, args.trialnumber)) # Last validation after training test_samples_indices = range(100) total_n = 0 if validation_only: r_loss_loc_valid = 0 r_loss_scale_valid = 0 r_loss_latent_valid = 0 dmm.train(False) val_pred_length = args.validpredlength val_pred_input_length = 10 with torch.no_grad(): for i in tqdm(test_samples_indices, desc='Valid', leave=True): # Data processing data = valid_dataset[i] if torch.isnan(torch.sum(data['observation'])): print("Skip {}".format(i)) continue else: total_n += 1 data['observation'] = normalize( data['observation'].unsqueeze(0).unsqueeze(2).to(device)) batch_size, length, _, w, h = data['observation'].shape data_reversed = reverse_sequences(data['observation']) data_mask = torch.ones( batch_size, length, input_channels, w, h).to(device) # Prediction pred_tensor_mask = torch.ones( batch_size, input_length_for_pred, input_channels, w, h).to(device) preds, _, loss_loc, loss_scale = do_prediction_rep_inference( dmm, pred_tensor_mask, val_pred_length, val_pred_input_length, data['observation']) ground_truth = denormalize( data['observation'].squeeze().cpu().detach() ) pred_with_input = denormalize( torch.cat( [data['observation'][:, :-val_pred_length, :, :, :].squeeze(), preds.squeeze()], dim=0 ).cpu().detach() ) # Save samples if i < 5: save_dir_samples = Path('./samples/more_variance_long') with open(save_dir_samples / '{}-gt-test.pkl'.format(i), 'wb') as fout: pickle.dump(ground_truth, fout) with open(save_dir_samples / '{}-vanilladmm-pred-test.pkl'.format(i), 'wb') as fout: pickle.dump(pred_with_input, fout) # Running losses r_loss_loc_valid += loss_loc r_loss_scale_valid += loss_scale r_loss_latent_valid += np.sum((preds.squeeze().detach().cpu().numpy( ) - data['latent'][time_length - val_pred_length:, :, :].detach().cpu().numpy()) ** 2) # Average losses test_samples_indices = range(total_n) print(total_n) valid_loss_loc_avg = r_loss_loc_valid / \ (total_n * val_pred_length * width * height) valid_loss_scale_avg = r_loss_scale_valid / \ (total_n * val_pred_length * width * height) valid_loss_latent_avg = r_loss_latent_valid / \ (total_n * val_pred_length * width * height) logging.info("Validation obs loss for %ds pred VanillaDMM: %f", val_pred_length, valid_loss_loc_avg) logging.info("Validation latent loss: %f", valid_loss_latent_avg) with open('VanillaDMMResult.log', 'a+') as fout: validation_log = 'Pred {}s VanillaDMM: {}\n'.format( val_pred_length, valid_loss_loc_avg) fout.write(validation_log)
def test_elbo_mapdata(map_type, batch_size, n_steps, lr): # normal-normal: known covariance lam0 = torch.tensor([0.1, 0.1]) # precision of prior loc0 = torch.tensor([0.0, 0.5]) # prior mean # known precision of observation noise lam = torch.tensor([6.0, 4.0]) data = [] sum_data = torch.zeros(2) def add_data_point(x, y): data.append(torch.tensor([x, y])) sum_data.data.add_(data[-1].data) add_data_point(0.1, 0.21) add_data_point(0.16, 0.11) add_data_point(0.06, 0.31) add_data_point(-0.01, 0.07) add_data_point(0.23, 0.25) add_data_point(0.19, 0.18) add_data_point(0.09, 0.41) add_data_point(-0.04, 0.17) data = torch.stack(data) n_data = torch.tensor([float(len(data))]) analytic_lam_n = lam0 + n_data.expand_as(lam) * lam analytic_log_sig_n = -0.5 * torch.log(analytic_lam_n) analytic_loc_n = sum_data * (lam / analytic_lam_n) + loc0 * ( lam0 / analytic_lam_n) logger.debug("DOING ELBO TEST [bs = {}, map_type = {}]".format( batch_size, map_type)) pyro.clear_param_store() def model(): loc_latent = pyro.sample( "loc_latent", dist.Normal(loc0, torch.pow(lam0, -0.5)).to_event(1)) if map_type == "iplate": for i in pyro.plate("aaa", len(data), batch_size): pyro.sample( "obs_%d" % i, dist.Normal(loc_latent, torch.pow(lam, -0.5)).to_event(1), obs=data[i], ), elif map_type == "plate": with pyro.plate("aaa", len(data), batch_size) as ind: pyro.sample( "obs", dist.Normal(loc_latent, torch.pow(lam, -0.5)).to_event(1), obs=data[ind], ), else: for i, x in enumerate(data): pyro.sample( "obs_%d" % i, dist.Normal(loc_latent, torch.pow(lam, -0.5)).to_event(1), obs=x, ) return loc_latent def guide(): loc_q = pyro.param( "loc_q", analytic_loc_n.detach().clone() + torch.tensor([-0.18, 0.23])) log_sig_q = pyro.param( "log_sig_q", analytic_log_sig_n.detach().clone() - torch.tensor([-0.18, 0.23]), ) sig_q = torch.exp(log_sig_q) pyro.sample("loc_latent", dist.Normal(loc_q, sig_q).to_event(1)) if map_type == "iplate" or map_type is None: for i in pyro.plate("aaa", len(data), batch_size): pass elif map_type == "plate": # dummy plate to do subsampling for observe with pyro.plate("aaa", len(data), batch_size): pass else: pass adam = optim.Adam({"lr": lr}) svi = SVI(model, guide, adam, loss=TraceGraph_ELBO()) for k in range(n_steps): svi.step() loc_error = torch.sum( torch.pow(analytic_loc_n - pyro.param("loc_q"), 2.0)) log_sig_error = torch.sum( torch.pow(analytic_log_sig_n - pyro.param("log_sig_q"), 2.0)) if k % 500 == 0: logger.debug("errors - {}, {}".format(loc_error, log_sig_error)) assert_equal(loc_error.item(), 0, prec=0.05) assert_equal(log_sig_error.item(), 0, prec=0.06)
def test_nested_list_map_data_in_elbo(self, n_steps=4000): pyro.clear_param_store() def model(): mu_latent = pyro.sample( "mu_latent", dist.Normal(self.mu0, torch.pow(self.lam0, -0.5), reparameterized=False)) def obs_outer(i, x): pyro.map_data("map_obs_inner_%d" % i, x, lambda _i, _x: obs_inner(i, _i, _x), batch_size=3) def obs_inner(i, _i, _x): pyro.observe("obs_%d_%d" % (i, _i), dist.normal, _x, mu_latent, torch.pow(self.lam, -0.5)) pyro.map_data("map_obs_outer", self.data, lambda i, x: obs_outer(i, x), batch_size=3) return mu_latent def guide(): mu_q = pyro.param("mu_q", Variable(self.analytic_mu_n.data + 0.234 * torch.ones(2), requires_grad=True)) log_sig_q = pyro.param("log_sig_q", Variable( self.analytic_log_sig_n.data - 0.27 * torch.ones(2), requires_grad=True)) sig_q = torch.exp(log_sig_q) mu_latent = pyro.sample( "mu_latent", dist.Normal(mu_q, sig_q, reparameterized=False), baseline=dict(use_decaying_avg_baseline=True)) def obs_outer(i, x): pyro.map_data("map_obs_inner_%d" % i, x, lambda _i, _x: None, batch_size=3) pyro.map_data("map_obs_outer", self.data, lambda i, x: obs_outer(i, x), batch_size=3) return mu_latent guide_trace = pyro.poutine.trace(guide, graph_type="dense").get_trace() model_trace = pyro.poutine.trace(pyro.poutine.replay(model, guide_trace), graph_type="dense").get_trace() assert len(model_trace.edges()) == 27 assert len(model_trace.nodes()) == 16 assert len(guide_trace.edges()) == 0 assert len(guide_trace.nodes()) == 9 adam = optim.Adam({"lr": 0.0008, "betas": (0.96, 0.999)}) svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True) for k in range(n_steps): svi.step() mu_error = param_mse("mu_q", self.analytic_mu_n) log_sig_error = param_mse("log_sig_q", self.analytic_log_sig_n) if k % 500 == 0 and self.verbose: print("mu error, log(sigma) error: %.4f, %.4f" % (mu_error, log_sig_error)) self.assertEqual(0.0, mu_error, prec=0.04) self.assertEqual(0.0, log_sig_error, prec=0.04)
[["mean", "std", "5%", "25%", "50%", "75%", "95%"]] return site_stats # Prepare training data train = torch.tensor(df.values, dtype=torch.float) svi = SVI(model, guide, optim.Adam({"lr": .005}), loss=Trace_ELBO(), num_samples=1000) x_data, y_data = train[:, :-1], train[:, 2] pyro.clear_param_store() num_iters = 8000 if not smoke_test else 2 for i in range(num_iters): elbo = svi.step(x_data, y_data) if i % 500 == 0: logging.info("Elbo loss: {}".format(elbo)) posterior = svi.run(x_data, y_data) sites = ["a", "bA", "bR", "bAR", "sigma"] for site, values in summary(posterior, sites).items(): print("Site: {}".format(site)) print(values, "\n") def wrapped_model(x_data, y_data): pyro.sample("prediction", dist.Delta(model(x_data, y_data)))
# setup visdom for visualization if args.visdom_flag: vis = visdom.Visdom() train_elbo = [] test_elbo = [] # training loop for epoch in range(args.num_epochs): # initialize loss accumulator epoch_loss = 0. # do a training epoch over each mini-batch x returned # by the data loader for x,_ in loader: # do ELBO gradient and accumulate loss epoch_loss += svi.step(x) # report training diagnostics normalizer_train = len(train_loader.dataset) total_epoch_loss_train = epoch_loss / normalizer_train train_elbo.append(total_epoch_loss_train) print("[epoch %03d] average training loss: %.4f" % (epoch, total_epoch_loss_train)) torch.save(vae.state_dict(), args.model_file) # if epoch % args.test_frequency == 0: # # initialize loss accumulator # test_loss = 0. # # compute the loss over the entire test set # for i, (x, _) in enumerate(test_loader): # # if on GPU put mini-batch into CUDA memory