def assert_ok(model, guide, elbo): """ Assert that inference works without warnings or errors. """ pyro.clear_param_store() inference = SVI(model, guide, Adam({"lr": 1e-6}), elbo) inference.step()
def test_quantiles(auto_class, Elbo): def model(): pyro.sample("x", dist.Normal(0.0, 1.0)) pyro.sample("y", dist.LogNormal(0.0, 1.0)) pyro.sample("z", dist.Beta(2.0, 2.0)) guide = auto_class(model) infer = SVI(model, guide, Adam({'lr': 0.01}), Elbo(strict_enumeration_warning=False)) for _ in range(100): infer.step() quantiles = guide.quantiles([0.1, 0.5, 0.9]) median = guide.median() for name in ["x", "y", "z"]: assert_equal(median[name], quantiles[name][1]) quantiles = {name: [v.item() for v in value] for name, value in quantiles.items()} assert -3.0 < quantiles["x"][0] assert quantiles["x"][0] + 1.0 < quantiles["x"][1] assert quantiles["x"][1] + 1.0 < quantiles["x"][2] assert quantiles["x"][2] < 3.0 assert 0.01 < quantiles["y"][0] assert quantiles["y"][0] * 2.0 < quantiles["y"][1] assert quantiles["y"][1] * 2.0 < quantiles["y"][2] assert quantiles["y"][2] < 100.0 assert 0.01 < quantiles["z"][0] assert quantiles["z"][0] + 0.1 < quantiles["z"][1] assert quantiles["z"][1] + 0.1 < quantiles["z"][2] assert quantiles["z"][2] < 0.99
def test_dirichlet_bernoulli(Elbo, vectorized): pyro.clear_param_store() data = torch.tensor([1.0] * 6 + [0.0] * 4) def model1(data): concentration0 = torch.tensor([10.0, 10.0]) f = pyro.sample("latent_fairness", dist.Dirichlet(concentration0))[1] for i in pyro.irange("irange", len(data)): pyro.sample("obs_{}".format(i), dist.Bernoulli(f), obs=data[i]) def model2(data): concentration0 = torch.tensor([10.0, 10.0]) f = pyro.sample("latent_fairness", dist.Dirichlet(concentration0))[1] pyro.sample("obs", dist.Bernoulli(f).expand_by(data.shape).independent(1), obs=data) model = model2 if vectorized else model1 def guide(data): concentration_q = pyro.param("concentration_q", torch.tensor([15.0, 15.0]), constraint=constraints.positive) pyro.sample("latent_fairness", dist.Dirichlet(concentration_q)) elbo = Elbo(num_particles=7, strict_enumeration_warning=False) optim = Adam({"lr": 0.0005, "betas": (0.90, 0.999)}) svi = SVI(model, guide, optim, elbo) for step in range(40): svi.step(data)
def test_dynamic_lr(scheduler, num_steps): pyro.clear_param_store() def model(): sample = pyro.sample('latent', Normal(torch.tensor(0.), torch.tensor(0.3))) return pyro.sample('obs', Normal(sample, torch.tensor(0.2)), obs=torch.tensor(0.1)) def guide(): loc = pyro.param('loc', torch.tensor(0.)) scale = pyro.param('scale', torch.tensor(0.5)) pyro.sample('latent', Normal(loc, scale)) svi = SVI(model, guide, scheduler, loss=TraceGraph_ELBO()) for epoch in range(2): scheduler.set_epoch(epoch) for _ in range(num_steps): svi.step() if epoch == 1: loc = pyro.param('loc') scale = pyro.param('scale') opt = scheduler.optim_objs[loc].optimizer assert opt.state_dict()['param_groups'][0]['lr'] == 0.02 assert opt.state_dict()['param_groups'][0]['initial_lr'] == 0.01 opt = scheduler.optim_objs[scale].optimizer assert opt.state_dict()['param_groups'][0]['lr'] == 0.02 assert opt.state_dict()['param_groups'][0]['initial_lr'] == 0.01
def test_svi_step_guide_uses_grad(enumerate1): data = torch.tensor([0., 1., 3.]) @poutine.broadcast def model(): scale = pyro.param("scale") loc = pyro.sample("loc", dist.Normal(0., 10.)) with pyro.iarange("data", len(data)): pyro.sample("obs", dist.Normal(loc, scale), obs=data) pyro.sample("b", dist.Bernoulli(0.5)) @config_enumerate(default=enumerate1) def guide(): p = pyro.param("p", torch.tensor(0.5), constraint=constraints.unit_interval) scale = pyro.param("scale", torch.tensor(1.0), constraint=constraints.positive) var = pyro.param("var", torch.tensor(1.0), constraint=constraints.positive) x = torch.tensor(0., requires_grad=True) prior = dist.Normal(0., 10.).log_prob(x) likelihood = dist.Normal(x, scale).log_prob(data).sum() loss = -(prior + likelihood) g = grad(loss, [x], create_graph=True)[0] H = grad(g, [x], create_graph=True)[0] loc = x.detach() - g / H # newton step pyro.sample("loc", dist.Normal(loc, var)) pyro.sample("b", dist.Bernoulli(p)) elbo = TraceEnum_ELBO(max_iarange_nesting=1, strict_enumeration_warning=any([enumerate1])) inference = SVI(model, guide, pyro.optim.Adam({}), elbo) inference.step()
def main(args): pyro.set_rng_seed(0) pyro.enable_validation() optim = Adam({"lr": 0.1}) inference = SVI(model, guide, optim, loss=Trace_ELBO()) # Data is an arbitrary json-like structure with tensors at leaves. one = torch.tensor(1.0) data = { "foo": one, "bar": [0 * one, 1 * one, 2 * one], "baz": { "noun": { "concrete": 4 * one, "abstract": 6 * one, }, "verb": 2 * one, }, } print('Step\tLoss') loss = 0.0 for step in range(args.num_epochs): loss += inference.step(data) if step and step % 10 == 0: print('{}\t{:0.5g}'.format(step, loss)) loss = 0.0 print('Parameters:') for name in sorted(pyro.get_param_store().get_all_param_names()): print('{} = {}'.format(name, pyro.param(name).detach().cpu().numpy()))
def assert_error(model, guide, elbo): """ Assert that inference fails with an error. """ pyro.clear_param_store() inference = SVI(model, guide, Adam({"lr": 1e-6}), elbo) with pytest.raises((NotImplementedError, UserWarning, KeyError, ValueError, RuntimeError)): inference.step()
def test_svi_step_smoke(model, guide, enum_discrete, trace_graph): pyro.clear_param_store() data = Variable(torch.Tensor([0, 1, 9])) optimizer = pyro.optim.Adam({"lr": .001}) inference = SVI(model, guide, optimizer, loss="ELBO", trace_graph=trace_graph, enum_discrete=enum_discrete) with xfail_if_not_implemented(): inference.step(data)
def do_elbo_test(self, reparameterized, n_steps, lr, prec, beta1, difficulty=1.0, model_permutation=False): n_repa_nodes = torch.sum(self.which_nodes_reparam) if not reparameterized \ else len(self.q_topo_sort) logger.info((" - - - DO GAUSSIAN %d-LAYERED PYRAMID ELBO TEST " + "(with a total of %d RVs) [reparameterized=%s; %d/%d; perm=%s] - - -") % (self.N, (2 ** self.N) - 1, reparameterized, n_repa_nodes, len(self.q_topo_sort), model_permutation)) pyro.clear_param_store() # check graph structure is as expected but only for N=2 if self.N == 2: guide_trace = pyro.poutine.trace(self.guide, graph_type="dense").get_trace(reparameterized=reparameterized, model_permutation=model_permutation, difficulty=difficulty) expected_nodes = set(['log_sig_1R', 'kappa_1_1L', '_INPUT', 'constant_term_loc_latent_1R', '_RETURN', 'loc_latent_1R', 'loc_latent_1', 'constant_term_loc_latent_1', 'loc_latent_1L', 'constant_term_loc_latent_1L', 'log_sig_1L', 'kappa_1_1R', 'kappa_1R_1L', 'log_sig_1']) expected_edges = set([('loc_latent_1R', 'loc_latent_1'), ('loc_latent_1L', 'loc_latent_1R'), ('loc_latent_1L', 'loc_latent_1')]) assert expected_nodes == set(guide_trace.nodes) assert expected_edges == set(guide_trace.edges) adam = optim.Adam({"lr": lr, "betas": (beta1, 0.999)}) svi = SVI(self.model, self.guide, adam, loss=TraceGraph_ELBO()) for step in range(n_steps): t0 = time.time() svi.step(reparameterized=reparameterized, model_permutation=model_permutation, difficulty=difficulty) if step % 5000 == 0 or step == n_steps - 1: log_sig_errors = [] for node in self.target_lambdas: target_log_sig = -0.5 * torch.log(self.target_lambdas[node]) log_sig_error = param_mse('log_sig_' + node, target_log_sig) log_sig_errors.append(log_sig_error) max_log_sig_error = np.max(log_sig_errors) min_log_sig_error = np.min(log_sig_errors) mean_log_sig_error = np.mean(log_sig_errors) leftmost_node = self.q_topo_sort[0] leftmost_constant_error = param_mse('constant_term_' + leftmost_node, self.target_leftmost_constant) almost_leftmost_constant_error = param_mse('constant_term_' + leftmost_node[:-1] + 'R', self.target_almost_leftmost_constant) logger.debug("[mean function constant errors (partial)] %.4f %.4f" % (leftmost_constant_error, almost_leftmost_constant_error)) logger.debug("[min/mean/max log(scale) errors] %.4f %.4f %.4f" % (min_log_sig_error, mean_log_sig_error, max_log_sig_error)) logger.debug("[step time = %.3f; N = %d; step = %d]\n" % (time.time() - t0, self.N, step)) assert_equal(0.0, max_log_sig_error, prec=prec) assert_equal(0.0, leftmost_constant_error, prec=prec) assert_equal(0.0, almost_leftmost_constant_error, prec=prec)
def test_svi_step_smoke(model, guide, enumerate1): pyro.clear_param_store() data = torch.tensor([0.0, 1.0, 9.0]) guide = config_enumerate(guide, default=enumerate1) optimizer = pyro.optim.Adam({"lr": .001}) elbo = TraceEnum_ELBO(max_iarange_nesting=1, strict_enumeration_warning=any([enumerate1])) inference = SVI(model, guide, optimizer, loss=elbo) inference.step(data)
def assert_warning(model, guide, elbo): """ Assert that inference works but with a warning. """ pyro.clear_param_store() inference = SVI(model, guide, Adam({"lr": 1e-6}), elbo) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") inference.step() assert len(w), 'No warnings were raised' for warning in w: logger.info(warning)
def test_svi(Elbo, num_particles): pyro.clear_param_store() data = torch.arange(10) def model(data): loc = pyro.param("loc", torch.tensor(0.0)) scale = pyro.param("scale", torch.tensor(1.0), constraint=constraints.positive) pyro.sample("x", dist.Normal(loc, scale).expand_by(data.shape).independent(1), obs=data) def guide(data): pass elbo = Elbo(num_particles=num_particles, strict_enumeration_warning=False) inference = SVI(model, guide, Adam({"lr": 1e-6}), elbo) for i in range(100): inference.step(data)
def do_elbo_test(self, reparameterized, n_steps, lr, prec, difficulty=1.0): n_repa_nodes = torch.sum(self.which_nodes_reparam) if not reparameterized else self.N logger.info(" - - - - - DO GAUSSIAN %d-CHAIN ELBO TEST [reparameterized = %s; %d/%d] - - - - - " % (self.N, reparameterized, n_repa_nodes, self.N)) if self.N < 0: def array_to_string(y): return str(map(lambda x: "%.3f" % x.detach().cpu().numpy()[0], y)) logger.debug("lambdas: " + array_to_string(self.lambdas)) logger.debug("target_mus: " + array_to_string(self.target_mus[1:])) logger.debug("target_kappas: "******"lambda_posts: " + array_to_string(self.lambda_posts[1:])) logger.debug("lambda_tilde_posts: " + array_to_string(self.lambda_tilde_posts)) pyro.clear_param_store() adam = optim.Adam({"lr": lr, "betas": (0.95, 0.999)}) elbo = TraceGraph_ELBO() loss_and_grads = elbo.loss_and_grads # loss_and_grads = elbo.jit_loss_and_grads # This fails. svi = SVI(self.model, self.guide, adam, loss=elbo.loss, loss_and_grads=loss_and_grads) for step in range(n_steps): t0 = time.time() svi.step(reparameterized=reparameterized, difficulty=difficulty) if step % 5000 == 0 or step == n_steps - 1: kappa_errors, log_sig_errors, loc_errors = [], [], [] for k in range(1, self.N + 1): if k != self.N: kappa_error = param_mse("kappa_q_%d" % k, self.target_kappas[k]) kappa_errors.append(kappa_error) loc_errors.append(param_mse("loc_q_%d" % k, self.target_mus[k])) log_sig_error = param_mse("log_sig_q_%d" % k, -0.5 * torch.log(self.lambda_posts[k])) log_sig_errors.append(log_sig_error) max_errors = (np.max(loc_errors), np.max(log_sig_errors), np.max(kappa_errors)) min_errors = (np.min(loc_errors), np.min(log_sig_errors), np.min(kappa_errors)) mean_errors = (np.mean(loc_errors), np.mean(log_sig_errors), np.mean(kappa_errors)) logger.debug("[max errors] (loc, log_scale, kappa) = (%.4f, %.4f, %.4f)" % max_errors) logger.debug("[min errors] (loc, log_scale, kappa) = (%.4f, %.4f, %.4f)" % min_errors) logger.debug("[mean errors] (loc, log_scale, kappa) = (%.4f, %.4f, %.4f)" % mean_errors) logger.debug("[step time = %.3f; N = %d; step = %d]\n" % (time.time() - t0, self.N, step)) assert_equal(0.0, max_errors[0], prec=prec) assert_equal(0.0, max_errors[1], prec=prec) assert_equal(0.0, max_errors[2], prec=prec)
def test_inference_deepGP(): gp1 = GPRegression(X, None, kernel, name="GPR1") Z, _ = gp1.model() gp2 = VariationalSparseGP(Z, y2D, Matern32(input_dim=3), Z.clone(), likelihood, name="GPR2") def model(): Z, _ = gp1.model() gp2.set_data(Z, y2D) gp2.model() def guide(): gp1.guide() gp2.guide() svi = SVI(model, guide, optim.Adam({}), Trace_ELBO()) svi.step()
def test_irange_smoke(auto_class, Elbo): def model(): x = pyro.sample("x", dist.Normal(0, 1)) assert x.shape == () for i in pyro.irange("irange", 3): y = pyro.sample("y_{}".format(i), dist.Normal(0, 1).expand_by([2, 1 + i, 2]).independent(3)) assert y.shape == (2, 1 + i, 2) z = pyro.sample("z", dist.Normal(0, 1).expand_by([2]).independent(1)) assert z.shape == (2,) pyro.sample("obs", dist.Bernoulli(0.1), obs=torch.tensor(0)) guide = auto_class(model) infer = SVI(model, guide, Adam({"lr": 1e-6}), Elbo(strict_enumeration_warning=False)) infer.step()
def test_iarange(Elbo, reparameterized): pyro.clear_param_store() data = torch.tensor([-0.5, 2.0]) num_particles = 20000 precision = 0.06 Normal = dist.Normal if reparameterized else fakes.NonreparameterizedNormal @poutine.broadcast def model(): particles_iarange = pyro.iarange("particles", num_particles, dim=-2) data_iarange = pyro.iarange("data", len(data), dim=-1) pyro.sample("nuisance_a", Normal(0, 1)) with particles_iarange, data_iarange: z = pyro.sample("z", Normal(0, 1)) pyro.sample("nuisance_b", Normal(2, 3)) with data_iarange, particles_iarange: pyro.sample("x", Normal(z, 1), obs=data) pyro.sample("nuisance_c", Normal(4, 5)) @poutine.broadcast def guide(): loc = pyro.param("loc", torch.zeros(len(data))) scale = pyro.param("scale", torch.tensor([1.])) pyro.sample("nuisance_c", Normal(4, 5)) with pyro.iarange("particles", num_particles, dim=-2): with pyro.iarange("data", len(data), dim=-1): pyro.sample("z", Normal(loc, scale)) pyro.sample("nuisance_b", Normal(2, 3)) pyro.sample("nuisance_a", Normal(0, 1)) optim = Adam({"lr": 0.1}) inference = SVI(model, guide, optim, loss=Elbo(strict_enumeration_warning=False)) inference.loss_and_grads(model, guide) params = dict(pyro.get_param_store().named_parameters()) actual_grads = {name: param.grad.detach().cpu().numpy() / num_particles for name, param in params.items()} expected_grads = {'loc': np.array([0.5, -2.0]), 'scale': np.array([2.0])} for name in sorted(params): logger.info('expected {} = {}'.format(name, expected_grads[name])) logger.info('actual {} = {}'.format(name, actual_grads[name])) assert_equal(actual_grads, expected_grads, prec=precision)
def test_median(auto_class, Elbo): def model(): pyro.sample("x", dist.Normal(0.0, 1.0)) pyro.sample("y", dist.LogNormal(0.0, 1.0)) pyro.sample("z", dist.Beta(2.0, 2.0)) guide = auto_class(model) infer = SVI(model, guide, Adam({'lr': 0.05}), Elbo(strict_enumeration_warning=False)) for _ in range(100): infer.step() median = guide.median() assert_equal(median["x"], torch.tensor(0.0), prec=0.1) if auto_class is AutoDelta: assert_equal(median["y"], torch.tensor(-1.0).exp(), prec=0.1) else: assert_equal(median["y"], torch.tensor(1.0), prec=0.1) assert_equal(median["z"], torch.tensor(0.5), prec=0.1)
def initialize(data): pyro.clear_param_store() optim = pyro.optim.Adam({'lr': 0.1, 'betas': [0.8, 0.99]}) elbo = TraceEnum_ELBO(max_iarange_nesting=1) svi = SVI(model, full_guide, optim, loss=elbo) # Initialize weights to uniform. pyro.param('auto_weights', 0.5 * torch.ones(K), constraint=constraints.simplex) # Assume half of the data variance is due to intra-component noise. var = (data.var() / 2).sqrt() pyro.param('auto_scale', torch.tensor([var]*4), constraint=constraints.positive) # Initialize means from a subsample of data. pyro.param('auto_locs', data[torch.multinomial(torch.ones(len(data)) / len(data), K)]) loss = svi.loss(model, full_guide, data) return loss, svi
def main(args): # load data print('loading training data...') if not os.path.exists('faces_training.csv'): wget.download('https://d2fefpcigoriu7.cloudfront.net/datasets/faces_training.csv', 'faces_training.csv') data = torch.tensor(np.loadtxt('faces_training.csv', delimiter=',')).float() sparse_gamma_def = SparseGammaDEF() opt = optim.AdagradRMSProp({"eta": 4.5, "t": 0.1}) svi = SVI(sparse_gamma_def.model, sparse_gamma_def.guide, opt, loss=Trace_ELBO()) print('\nbeginning training...') # the training loop for k in range(args.num_epochs): loss = svi.step(data) sparse_gamma_def.clip_params() # we clip params after each gradient step if k % 20 == 0 and k > 0: print("[epoch %04d] training elbo: %.4g" % (k, -loss))
def main(args): pyro.set_rng_seed(0) pyro.enable_validation() optim = Adam({"lr": 0.1}) inference = SVI(model, guide, optim, loss=Trace_ELBO()) data = torch.tensor([0.0, 1.0, 2.0, 20.0, 30.0, 40.0]) k = 2 print('Step\tLoss') loss = 0.0 for step in range(args.num_epochs): if step and step % 10 == 0: print('{}\t{:0.5g}'.format(step, loss)) loss = 0.0 loss += inference.step(data, k) print('Parameters:') for name in sorted(pyro.get_param_store().get_all_param_names()): print('{} = {}'.format(name, pyro.param(name).detach().cpu().numpy()))
def test_subsample_gradient(trace_graph, reparameterized): pyro.clear_param_store() data_size = 2 subsample_size = 1 num_particles = 1000 precision = 0.333 data = dist.normal(ng_zeros(data_size), ng_ones(data_size)) def model(subsample_size): with pyro.iarange("data", len(data), subsample_size) as ind: x = data[ind] z = pyro.sample("z", dist.Normal(ng_zeros(len(x)), ng_ones(len(x)), reparameterized=reparameterized)) pyro.observe("x", dist.Normal(z, ng_ones(len(x)), reparameterized=reparameterized), x) def guide(subsample_size): mu = pyro.param("mu", lambda: Variable(torch.zeros(len(data)), requires_grad=True)) sigma = pyro.param("sigma", lambda: Variable(torch.ones(1), requires_grad=True)) with pyro.iarange("data", len(data), subsample_size) as ind: mu = mu[ind] sigma = sigma.expand(subsample_size) pyro.sample("z", dist.Normal(mu, sigma, reparameterized=reparameterized)) optim = Adam({"lr": 0.1}) inference = SVI(model, guide, optim, loss="ELBO", trace_graph=trace_graph, num_particles=num_particles) # Compute gradients without subsampling. inference.loss_and_grads(model, guide, subsample_size=data_size) params = dict(pyro.get_param_store().named_parameters()) expected_grads = {name: param.grad.data.clone() for name, param in params.items()} zero_grads(params.values()) # Compute gradients with subsampling. inference.loss_and_grads(model, guide, subsample_size=subsample_size) actual_grads = {name: param.grad.data.clone() for name, param in params.items()} for name in sorted(params): print('\nexpected {} = {}'.format(name, expected_grads[name].cpu().numpy())) print('actual {} = {}'.format(name, actual_grads[name].cpu().numpy())) assert_equal(actual_grads, expected_grads, prec=precision)
def test_elbo_nonreparameterized(self): if self.verbose: print(" - - - - - DO POISSON-GAMMA ELBO TEST - - - - - ") pyro.clear_param_store() def model(): lambda_latent = pyro.sample("lambda_latent", dist.gamma, self.alpha0, self.beta0) for i, x in enumerate(self.data): pyro.observe("obs_{}".format(i), dist.poisson, x, lambda_latent) return lambda_latent def guide(): alpha_q_log = pyro.param( "alpha_q_log", Variable( self.log_alpha_n.data + 0.17, requires_grad=True)) beta_q_log = pyro.param( "beta_q_log", Variable( self.log_beta_n.data - 0.143, requires_grad=True)) alpha_q, beta_q = torch.exp(alpha_q_log), torch.exp(beta_q_log) pyro.sample("lambda_latent", dist.gamma, alpha_q, beta_q, baseline=dict(use_decaying_avg_baseline=True)) adam = optim.Adam({"lr": .0007, "betas": (0.95, 0.999)}) svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True) for k in range(7000): svi.step() alpha_error = param_abs_error("alpha_q_log", self.log_alpha_n) beta_error = param_abs_error("beta_q_log", self.log_beta_n) if k % 500 == 0 and self.verbose: print("alpha_q_log_error, beta_q_log_error: %.4f, %.4f" % (alpha_error, beta_error)) self.assertEqual(0.0, alpha_error, prec=0.08) self.assertEqual(0.0, beta_error, prec=0.08)
def test_subsample_gradient(Elbo, reparameterized, subsample): pyro.clear_param_store() data = torch.tensor([-0.5, 2.0]) subsample_size = 1 if subsample else len(data) num_particles = 50000 precision = 0.06 Normal = dist.Normal if reparameterized else fakes.NonreparameterizedNormal def model(subsample): with pyro.iarange("particles", num_particles): with pyro.iarange("data", len(data), subsample_size, subsample) as ind: x = data[ind].unsqueeze(-1).expand(-1, num_particles) z = pyro.sample("z", Normal(0, 1).expand_by(x.shape)) pyro.sample("x", Normal(z, 1), obs=x) def guide(subsample): loc = pyro.param("loc", lambda: torch.zeros(len(data), requires_grad=True)) scale = pyro.param("scale", lambda: torch.tensor([1.0], requires_grad=True)) with pyro.iarange("particles", num_particles): with pyro.iarange("data", len(data), subsample_size, subsample) as ind: loc_ind = loc[ind].unsqueeze(-1).expand(-1, num_particles) pyro.sample("z", Normal(loc_ind, scale)) optim = Adam({"lr": 0.1}) elbo = Elbo(strict_enumeration_warning=False) inference = SVI(model, guide, optim, loss=elbo) if subsample_size == 1: inference.loss_and_grads(model, guide, subsample=torch.LongTensor([0])) inference.loss_and_grads(model, guide, subsample=torch.LongTensor([1])) else: inference.loss_and_grads(model, guide, subsample=torch.LongTensor([0, 1])) params = dict(pyro.get_param_store().named_parameters()) normalizer = 2 * num_particles / subsample_size actual_grads = {name: param.grad.detach().cpu().numpy() / normalizer for name, param in params.items()} expected_grads = {'loc': np.array([0.5, -2.0]), 'scale': np.array([2.0])} for name in sorted(params): logger.info('expected {} = {}'.format(name, expected_grads[name])) logger.info('actual {} = {}'.format(name, actual_grads[name])) assert_equal(actual_grads, expected_grads, prec=precision)
def test_elbo_with_transformed_distribution(self): if self.verbose: print(" - - - - - DO LOGNORMAL-NORMAL ELBO TEST [uses TransformedDistribution] - - - - - ") pyro.clear_param_store() def model(): mu_latent = pyro.sample("mu_latent", dist.normal, self.mu0, torch.pow(self.tau0, -0.5)) bijector = AffineExp(torch.pow(self.tau, -0.5), mu_latent) x_dist = TransformedDistribution(dist.normal, bijector) pyro.observe("obs0", x_dist, self.data[0], ng_zeros(1), ng_ones(1)) pyro.observe("obs1", x_dist, self.data[1], ng_zeros(1), ng_ones(1)) return mu_latent def guide(): mu_q_log = pyro.param( "mu_q_log", Variable( self.log_mu_n.data + 0.17, requires_grad=True)) tau_q_log = pyro.param("tau_q_log", Variable(self.log_tau_n.data - 0.143, requires_grad=True)) mu_q, tau_q = torch.exp(mu_q_log), torch.exp(tau_q_log) pyro.sample("mu_latent", dist.normal, mu_q, torch.pow(tau_q, -0.5)) adam = optim.Adam({"lr": 0.001, "betas": (0.95, 0.999)}) svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True) for k in range(7000): svi.step() mu_error = param_abs_error("mu_q_log", self.log_mu_n) tau_error = param_abs_error("tau_q_log", self.log_tau_n) if k % 500 == 0 and self.verbose: print("mu_error, tau_error = %.4f, %.4f" % (mu_error, tau_error)) self.assertEqual(0.0, mu_error, prec=0.05) self.assertEqual(0.0, tau_error, prec=0.05)
def do_elbo_test(self, reparameterized, n_steps): if self.verbose: print(" - - - - - DO NORMALNORMAL ELBO TEST [reparameterized = %s] - - - - - " % reparameterized) pyro.clear_param_store() def model(): mu_latent = pyro.sample( "mu_latent", dist.Normal(self.mu0, torch.pow(self.lam0, -0.5), reparameterized=reparameterized)) for i, x in enumerate(self.data): pyro.observe("obs_%d" % i, dist.normal, x, mu_latent, torch.pow(self.lam, -0.5)) return mu_latent def guide(): mu_q = pyro.param("mu_q", Variable(self.analytic_mu_n.data + 0.334 * torch.ones(2), requires_grad=True)) log_sig_q = pyro.param("log_sig_q", Variable( self.analytic_log_sig_n.data - 0.29 * torch.ones(2), requires_grad=True)) sig_q = torch.exp(log_sig_q) mu_latent = pyro.sample("mu_latent", dist.Normal(mu_q, sig_q, reparameterized=reparameterized), baseline=dict(use_decaying_avg_baseline=True)) return mu_latent adam = optim.Adam({"lr": .0015, "betas": (0.97, 0.999)}) svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True) for k in range(n_steps): svi.step() mu_error = param_mse("mu_q", self.analytic_mu_n) log_sig_error = param_mse("log_sig_q", self.analytic_log_sig_n) if k % 250 == 0 and self.verbose: print("mu error, log(sigma) error: %.4f, %.4f" % (mu_error, log_sig_error)) self.assertEqual(0.0, mu_error, prec=0.03) self.assertEqual(0.0, log_sig_error, prec=0.03)
def do_test_per_param_optim(self, fixed_param, free_param): pyro.clear_param_store() def model(): prior_dist = Normal(self.mu0, torch.pow(self.lam0, -0.5)) mu_latent = pyro.sample("mu_latent", prior_dist) x_dist = Normal(mu_latent, torch.pow(self.lam, -0.5)) pyro.observe("obs", x_dist, self.data) return mu_latent def guide(): mu_q = pyro.param( "mu_q", Variable( torch.zeros(1), requires_grad=True)) log_sig_q = pyro.param( "log_sig_q", Variable( torch.zeros(1), requires_grad=True)) sig_q = torch.exp(log_sig_q) pyro.sample("mu_latent", Normal(mu_q, sig_q)) def optim_params(module_name, param_name, tags): if param_name == fixed_param: return {'lr': 0.00} elif param_name == free_param: return {'lr': 0.01} adam = optim.Adam(optim_params) adam2 = optim.Adam(optim_params) svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True) svi2 = SVI(model, guide, adam2, loss="ELBO", trace_graph=True) svi.step() adam_initial_step_count = list(adam.get_state()['mu_q']['state'].items())[0][1]['step'] adam.save('adam.unittest.save') svi.step() adam_final_step_count = list(adam.get_state()['mu_q']['state'].items())[0][1]['step'] adam2.load('adam.unittest.save') svi2.step() adam2_step_count_after_load_and_step = list(adam2.get_state()['mu_q']['state'].items())[0][1]['step'] assert adam_initial_step_count == 1 assert adam_final_step_count == 2 assert adam2_step_count_after_load_and_step == 2 free_param_unchanged = torch.equal(pyro.param(free_param).data, torch.zeros(1)) fixed_param_unchanged = torch.equal(pyro.param(fixed_param).data, torch.zeros(1)) assert fixed_param_unchanged and not free_param_unchanged
def optimize(self, optimizer=None, loss=None, num_steps=1000): """ A convenient method to optimize parameters for the Gaussian Process model using :class:`~pyro.infer.svi.SVI`. :param PyroOptim optimizer: A Pyro optimizer. :param ELBO loss: A Pyro loss instance. :param int num_steps: Number of steps to run SVI. :returns: a list of losses during the training procedure :rtype: list """ if optimizer is None: optimizer = Adam({}) if not isinstance(optimizer, PyroOptim): raise ValueError("Optimizer should be an instance of " "pyro.optim.PyroOptim class.") if loss is None: loss = Trace_ELBO() svi = SVI(self.model, self.guide, optimizer, loss=loss) losses = [] for i in range(num_steps): losses.append(svi.step()) return losses
def do_elbo_test(self, reparameterized, n_steps, beta1, lr): if self.verbose: print(" - - - - - DO LOGNORMAL-NORMAL ELBO TEST [repa = %s] - - - - - " % reparameterized) pyro.clear_param_store() pt_guide = LogNormalNormalGuide(self.log_mu_n.data + 0.17, self.log_tau_n.data - 0.143) def model(): mu_latent = pyro.sample("mu_latent", dist.normal, self.mu0, torch.pow(self.tau0, -0.5)) sigma = torch.pow(self.tau, -0.5) pyro.observe("obs0", dist.lognormal, self.data[0], mu_latent, sigma) pyro.observe("obs1", dist.lognormal, self.data[1], mu_latent, sigma) return mu_latent def guide(): pyro.module("mymodule", pt_guide) mu_q, tau_q = torch.exp(pt_guide.mu_q_log), torch.exp(pt_guide.tau_q_log) sigma = torch.pow(tau_q, -0.5) pyro.sample("mu_latent", dist.Normal(mu_q, sigma, reparameterized=reparameterized), baseline=dict(use_decaying_avg_baseline=True)) adam = optim.Adam({"lr": lr, "betas": (beta1, 0.999)}) svi = SVI(model, guide, adam, loss="ELBO", trace_graph=True) for k in range(n_steps): svi.step() mu_error = param_abs_error("mymodule$$$mu_q_log", self.log_mu_n) tau_error = param_abs_error("mymodule$$$tau_q_log", self.log_tau_n) if k % 500 == 0 and self.verbose: print("mu_error, tau_error = %.4f, %.4f" % (mu_error, tau_error)) self.assertEqual(0.0, mu_error, prec=0.05) self.assertEqual(0.0, tau_error, prec=0.05)
if printHere: print "BACKWARD 3 " + __file__ + " " + language + " " + str( myID) + " " + str(counter) logitCorr = batchOrdered[0][-1]["relevant_logprob_sum"] pyro.sample("result_Correct_{}".format(q), Bernoulli(logits=logitCorr), obs=Variable(torch.FloatTensor([1.0]))) adam_params = {"lr": 0.001, "betas": (0.90, 0.999)} optimizer = Adam(adam_params) # setup the inference algorithm from pyro.infer import Trace_ELBO svi = SVI(model, guide, optimizer, loss=Trace_ELBO()) #, num_particles=7) n_steps = 400000 # do gradient steps for step in range(1, n_steps): if step % 100 == 1: print "DOING A STEP" print "......." print step # for name in pyro.get_param_store().get_all_param_names(): # print [name, pyro.param(name).data.numpy()] svi.step(corpus) if step % 2000 == 0: print "Saving"
print('tensor_1') print(x_data_.shape) # run the nn forward on data prediction_mean = lifted_reg_model(x_data_).squeeze(-1) print('tensor_2') print(x_data_.shape) # condition on the observed data pyro.sample("obs", Normal(prediction_mean, scale), obs=y_data_) return prediction_mean from pyro.contrib.autoguide import AutoDiagonalNormal guide = AutoDiagonalNormal(model) optim = Adam({"lr": 0.01}) svi = SVI(model, guide, optim, loss=Trace_ELBO(), num_samples=50000) type(svi) type(guide) def train(): pyro.clear_param_store() for j in range(num_iterations): # calculate the loss and take a gradient step loss = svi.step(x_data_, y_data_) if j % 100 == 0: print("[iteration %04d] loss: %.4f" % (j + 1, loss / len(x_data_))) x_data.shape
plt.show() if __name__ == '__main__': # Set up environment dp_gt = dict(m=2., k=20., d=0.8) # ground truth dp_init = dict(m=1.0, k=24., d=0.4) # initial guess dt = 1/50. env = OneMassOscillatorSim(dt=dt, max_steps=400) env.reset(domain_param=dp_gt) # Set up policy policy = DummyPolicy(env.spec) # Sample sampler = ParallelSampler(env, policy, num_envs=1, min_rollouts=50, seed=1) ros = sampler.sample() # Pyro pyro.set_rng_seed(1001) pyro.enable_validation(True) train( SVI(model=model, guide=guide, optim=optim.Adam({'lr': 0.01}), # optim=optim.SGD({'lr': 0.001, 'momentum': 0.1}), loss=Trace_ELBO()), rollouts=ros, prior=dp_init )
def main(args): """ run inference for SS-VAE :param args: arguments for SS-VAE :return: None """ if args.seed is not None: pyro.set_rng_seed(args.seed) viz = None if args.visualize: viz = Visdom() mkdir_p("./vae_results") # batch_size: number of images (and labels) to be considered in a batch ss_vae = SSVAE(z_dim=args.z_dim, hidden_layers=args.hidden_layers, use_cuda=args.cuda, config_enum=args.enum_discrete, aux_loss_multiplier=args.aux_loss_multiplier) # setup the optimizer adam_params = {"lr": args.learning_rate, "betas": (args.beta_1, 0.999)} optimizer = Adam(adam_params) # set up the loss(es) for inference. wrapping the guide in config_enumerate builds the loss as a sum # by enumerating each class label for the sampled discrete categorical distribution in the model guide = config_enumerate(ss_vae.guide, args.enum_discrete, expand=True) elbo = (JitTraceEnum_ELBO if args.jit else TraceEnum_ELBO)( max_plate_nesting=1) loss_basic = SVI(ss_vae.model, guide, optimizer, loss=elbo) # build a list of all losses considered losses = [loss_basic] # aux_loss: whether to use the auxiliary loss from NIPS 14 paper (Kingma et al) if args.aux_loss: elbo = JitTrace_ELBO() if args.jit else Trace_ELBO() loss_aux = SVI(ss_vae.model_classify, ss_vae.guide_classify, optimizer, loss=elbo) losses.append(loss_aux) try: # setup the logger if a filename is provided logger = open(args.logfile, "w") if args.logfile else None data_loaders = setup_data_loaders(MNISTCached, args.cuda, args.batch_size, sup_num=args.sup_num) # how often would a supervised batch be encountered during inference # e.g. if sup_num is 3000, we would have every 16th = int(50000/3000) batch supervised # until we have traversed through the all supervised batches periodic_interval_batches = int(MNISTCached.train_data_size / (1.0 * args.sup_num)) # number of unsupervised examples unsup_num = MNISTCached.train_data_size - args.sup_num # initializing local variables to maintain the best validation accuracy # seen across epochs over the supervised training set # and the corresponding testing set and the state of the networks best_valid_acc, corresponding_test_acc = 0.0, 0.0 # WL: added. ===== print_and_log(logger, args) print_and_log( logger, "\nepoch\t" + "elbo(sup)\t" + "elbo(unsup)\t" + "time(sec)") times = [time.time()] # ================ # run inference for a certain number of epochs for i in range(0, args.num_epochs): # get the losses for an epoch epoch_losses_sup, epoch_losses_unsup = \ run_inference_for_epoch(data_loaders, losses, periodic_interval_batches) # compute average epoch losses i.e. losses per example avg_epoch_losses_sup = map(lambda v: v / args.sup_num, epoch_losses_sup) avg_epoch_losses_unsup = map(lambda v: v / unsup_num, epoch_losses_unsup) # store the loss and validation/testing accuracies in the logfile # WL: edited. ===== # str_loss_sup = " ".join(map(str, avg_epoch_losses_sup)) # str_loss_unsup = " ".join(map(str, avg_epoch_losses_unsup)) # str_print = "{} epoch: avg losses {}".format(i, "{} {}".format(str_loss_sup, str_loss_unsup)) times.append(time.time()) str_elbo_sup = " ".join( map(lambda v: f"{-v:.4f}", avg_epoch_losses_sup)) str_elbo_unsup = " ".join( map(lambda v: f"{-v:.4f}", avg_epoch_losses_unsup)) str_print = f"{i:06d}\t"\ f"{str_elbo_sup}\t"\ f"{str_elbo_unsup}\t"\ f"{times[-1]-times[-2]:.3f}" # ================= validation_accuracy = get_accuracy(data_loaders["valid"], ss_vae.classifier, args.batch_size) # WL: commented. ===== # str_print += " validation accuracy {}".format(validation_accuracy) # ==================== # this test accuracy is only for logging, this is not used # to make any decisions during training test_accuracy = get_accuracy(data_loaders["test"], ss_vae.classifier, args.batch_size) # WL: commented. ===== # str_print += " test accuracy {}".format(test_accuracy) # ==================== # update the best validation accuracy and the corresponding # testing accuracy and the state of the parent module (including the networks) if best_valid_acc < validation_accuracy: best_valid_acc = validation_accuracy corresponding_test_acc = test_accuracy print_and_log(logger, str_print) final_test_accuracy = get_accuracy(data_loaders["test"], ss_vae.classifier, args.batch_size) # WL: commented. ===== # print_and_log(logger, "best validation accuracy {} corresponding testing accuracy {} " # "last testing accuracy {}".format(best_valid_acc, corresponding_test_acc, final_test_accuracy)) # ==================== # visualize the conditional samples visualize(ss_vae, viz, data_loaders["test"]) finally: # close the logger file object if we opened it earlier if args.logfile: logger.close()
def run_guide(self): self.echo = True results = [] for _ in range(20): global init_state init_state = reset_init_state() survive = guide() results.append(survive) self.echo = False agent = AgentModel() guide = agent.guide model = agent.model learning_rate = 2e-3 #1e-5 optimizer = optim.Adam({"lr":learning_rate}) svi = SVI(model, guide, optimizer, loss=Trace_ELBO()) def optimize(): loss = 0 print("Optimizing...") for t in range(num_steps): global init_state init_state = reset_init_state() loss += svi.step() if (t % 100 == 0) and (t > 0): print("at {} step loss is {}".format(t, loss / t)) def train(epoch=2, batch_size=10): global start_time global total_duration for epoc in range(epoch):
class SVIExperiment(BaseCovariateExperiment): def __init__(self, hparams, pyro_model: BaseSEM): super().__init__(hparams, pyro_model) self.svi_loss = CustomELBO(num_particles=hparams.num_svi_particles) self._build_svi() def _build_svi(self, loss=None): def per_param_callable(module_name, param_name): params = { 'eps': 1e-5, 'amsgrad': self.hparams.use_amsgrad, 'weight_decay': self.hparams.l2 } if module_name == 'intensity_flow_components' or module_name == 'thickness_flow_components': params['lr'] = self.hparams.pgm_lr return params else: params['lr'] = self.hparams.lr return params if loss is None: loss = self.svi_loss if self.hparams.use_cf_guide: def guide(*args, **kwargs): return self.pyro_model.counterfactual_guide( *args, **kwargs, counterfactual_type=self.hparams.cf_elbo_type) self.svi = SVI(self.pyro_model.svi_model, guide, Adam(per_param_callable), loss) else: self.svi = SVI(self.pyro_model.svi_model, self.pyro_model.svi_guide, Adam(per_param_callable), loss) self.svi.loss_class = loss def backward(self, *args, **kwargs): pass # No loss to backpropagate since we're using Pyro's optimisation machinery def print_trace_updates(self, batch): print('Traces:\n' + ('#' * 10)) guide_trace = pyro.poutine.trace( self.pyro_model.svi_guide).get_trace(**batch) model_trace = pyro.poutine.trace( pyro.poutine.replay(self.pyro_model.svi_model, trace=guide_trace)).get_trace(**batch) guide_trace = pyro.poutine.util.prune_subsample_sites(guide_trace) model_trace = pyro.poutine.util.prune_subsample_sites(model_trace) model_trace.compute_log_prob() guide_trace.compute_score_parts() print(f'model: {model_trace.nodes.keys()}') for name, site in model_trace.nodes.items(): if site["type"] == "sample": fn = site['fn'] if isinstance(fn, Independent): fn = fn.base_dist print(f'{name}: {fn} - {fn.support}') log_prob_sum = site["log_prob_sum"] is_obs = site["is_observed"] print(f'model - log p({name}) = {log_prob_sum} | obs={is_obs}') if torch.isnan(log_prob_sum): value = site['value'][0] conc0 = fn.concentration0 conc1 = fn.concentration1 print(f'got:\n{value}\n{conc0}\n{conc1}') raise Exception() print(f'guide: {guide_trace.nodes.keys()}') for name, site in guide_trace.nodes.items(): if site["type"] == "sample": fn = site['fn'] if isinstance(fn, Independent): fn = fn.base_dist print(f'{name}: {fn} - {fn.support}') entropy = site["score_parts"].entropy_term.sum() is_obs = site["is_observed"] print(f'guide - log q({name}) = {entropy} | obs={is_obs}') def get_trace_metrics(self, batch): metrics = {} model = self.svi.loss_class.trace_storage['model'] guide = self.svi.loss_class.trace_storage['guide'] metrics['log p(x)'] = model.nodes['x']['log_prob'].mean() metrics['log p(intensity)'] = model.nodes['intensity'][ 'log_prob'].mean() metrics['log p(thickness)'] = model.nodes['thickness'][ 'log_prob'].mean() metrics['p(z)'] = model.nodes['z']['log_prob'].mean() metrics['q(z)'] = guide.nodes['z']['log_prob'].mean() metrics['log p(z) - log q(z)'] = metrics['p(z)'] - metrics['q(z)'] return metrics def prep_batch(self, batch): x = batch['image'] thickness = batch['thickness'].unsqueeze(1).float() intensity = batch['intensity'].unsqueeze(1).float() x = x.float() if self.training: x += torch.rand_like(x) x = x.unsqueeze(1) return {'x': x, 'thickness': thickness, 'intensity': intensity} def training_step(self, batch, batch_idx): batch = self.prep_batch(batch) if self.hparams.validate: print('Validation:') self.print_trace_updates(batch) loss = self.svi.step(**batch) metrics = self.get_trace_metrics(batch) if np.isnan(loss): self.logger.experiment.add_text( 'nan', f'nand at {self.current_epoch}:\n{metrics}') raise ValueError( 'loss went to nan with metrics:\n{}'.format(metrics)) tensorboard_logs = {('train/' + k): v for k, v in metrics.items()} tensorboard_logs['train/loss'] = loss return {'loss': torch.Tensor([loss]), 'log': tensorboard_logs} def validation_step(self, batch, batch_idx): batch = self.prep_batch(batch) loss = self.svi.evaluate_loss(**batch) metrics = self.get_trace_metrics(batch) return {'loss': loss, **metrics} def test_step(self, batch, batch_idx): batch = self.prep_batch(batch) loss = self.svi.evaluate_loss(**batch) metrics = self.get_trace_metrics(batch) samples = self.build_test_samples(batch) return {'loss': loss, **metrics, 'samples': samples} @classmethod def add_arguments(cls, parser): parser = super().add_arguments(parser) parser.add_argument( '--num_svi_particles', default=4, type=int, help="number of particles to use for ELBO (default: %(default)s)") parser.add_argument( '--num_sample_particles', default=32, type=int, help= "number of particles to use for MC sampling (default: %(default)s)" ) parser.add_argument( '--use_cf_guide', default=False, action='store_true', help="whether to use counterfactual guide (default: %(default)s)") parser.add_argument( '--cf_elbo_type', default=-1, choices=[-1, 0, 1, 2], help= "-1: randomly select per batch, 0: shuffle thickness, 1: shuffle intensity, 2: shuffle both (default: %(default)s)" ) return parser
outw_sigma_param = softplus(pyro.param("outw_sigma", outw_sigma)) outw_prior = Normal(loc=outw_mu_param, scale=outw_sigma_param).independent(1) # Output layer bias distribution priors outb_mu = torch.randn_like(net.out.bias) outb_sigma = torch.randn_like(net.out.bias) outb_mu_param = pyro.param("outb_mu", outb_mu) outb_sigma_param = softplus(pyro.param("outb_sigma", outb_sigma)) outb_prior = Normal(loc=outb_mu_param, scale=outb_sigma_param) priors = {'fc1.weight': fc1w_prior, 'fc1.bias': fc1b_prior, 'out.weight': outw_prior, 'out.bias': outb_prior} lifted_module = pyro.random_module("module", net, priors) return lifted_module() optim = Adam({"lr": 0.01}) svi = SVI(model, guide, optim, loss=Trace_ELBO()) num_iterations = 5 loss = 0 for j in range(num_iterations): loss = 0 for batch_id, data in enumerate(train_loader): # calculate the loss and take a gradient step loss += svi.step(data[0].view(-1,28*28), data[1]) normalizer_train = len(train_loader.dataset) total_epoch_loss_train = loss / normalizer_train print("Epoch ", j, " Loss ", total_epoch_loss_train)
s0 = 'epoch {0}/{1}\n'.format(epoch, NUM_EPOCHS) s1, s2 = '', '' for k, v in train_props.items(): s1 = s1 + 'train_' + k + ': ' + str(v) + ' ' for k, v in cv_props.items(): s2 = s2 + 'cv_' + k + ': ' + str(v) + ' ' print(s0 + s1 + s2) if __name__ == "__main__": try: pyro.clear_param_store() clf = Classifier() opt = ClippedAdam({"lr": 0.005, "clip_norm": 0.5}) svi = SVI(model=clf.model, guide=clf.guide, optim=opt, loss=Trace_ELBO()) train_loader = torch.load('train_loader.pt') cv_loader = torch.load('cv_loader.pt') epochs = NUM_EPOCHS num_iter = 5 best_loss = 1e50 for epoch in tqdm(range(epochs)): train_props = {k: 0 for k in status_properties} for i, data in enumerate(train_loader): clf.train() x, targets = data x = x.to(device) targets = targets.to(device) targets = targets.view(-1)
def initialize_optimizer(self, lr): optimizer = Adam({'lr': lr}) elbo = JitTrace_ELBO() if self.args.jit else Trace_ELBO() return SVI(self.model, self.guide, optimizer, loss=elbo)
def train(self, epochs, lr=3.0e-5, tf=2): """Train the DLGM for some number of epochs.""" # Set up the optimizer. optimizer = Adam({"lr": lr}) train_elbo = {} test_elbo = {} start_epoch = 0 # Load cached state, if given. if self.load_dir is not None: filename = self.load_dir + 'checkpoint.tar' checkpoint = torch.load(filename) self.encoder.load_state_dict(checkpoint['encoder_state_dict']) self.decoder.load_state_dict(checkpoint['decoder_state_dict']) optimizer.set_state(checkpoint['optimizer_state']) train_elbo = checkpoint['train_elbo'] test_elbo = checkpoint['test_elbo'] start_epoch = checkpoint['epoch'] + 1 self.partition = checkpoint['partition'] self.train_loader, self.test_loader = get_data_loaders( self.partition, self.p) # Set up the inference algorithm. elbo = Trace_ELBO() svi = SVI(self.model, self.guide, optimizer, loss=elbo) print("dataset length: ", len(self.train_loader.dataset)) for epoch in range(start_epoch, start_epoch + epochs + 1, 1): train_loss = 0.0 # Iterate over the training data. for i, temp in enumerate(self.train_loader): x = temp['spec'].cuda().view(-1, self.input_dim) train_loss += svi.step(x) # Report training diagnostics. normalizer_train = len(self.train_loader.dataset) total_epoch_loss_train = train_loss / normalizer_train train_elbo[epoch] = total_epoch_loss_train print("[epoch %03d] average train loss: %.4f" % (epoch, total_epoch_loss_train)) if (epoch + 1) % tf == 0: test_loss = 0.0 # Iterate over the test set. for i, temp in enumerate(self.test_loader): x = temp['spec'].cuda().view(-1, self.input_dim) test_loss += svi.evaluate_loss(x) # Report test diagnostics. normalizer_test = len(self.test_loader.dataset) total_epoch_loss_test = test_loss / normalizer_test test_elbo[epoch] = total_epoch_loss_test print("[epoch %03d] average test loss: %.4f" % (epoch, total_epoch_loss_test)) self.visualize() if self.save_dir is not None: filename = self.save_dir + 'checkpoint.tar' state = { 'train_elbo': train_elbo, 'test_elbo': test_elbo, 'epoch': epoch, 'encoder_state_dict': self.encoder.state_dict(), 'decoder_state_dict': self.decoder.state_dict(), 'optimizer_state': optimizer.get_state(), 'partition': self.partition, } torch.save(state, filename)
for card in game.players[idx_to_id[i]].hand: card_holders[card] = i print(card_holders) for t in range(20): real_action = max(game.players[game.turn].action_probs(), key=itemgetter(1))[0] print(game.turn, real_action) if game.turn != my_id: # setup the optimizer adam_params = {"lr": 0.0001, "betas": (0.90, 0.999)} optimizer = Adam(adam_params) # setup the inference algorithm svi = SVI(model, guide, optimizer, loss=Trace_ELBO()) n_steps = 1000 # do gradient steps for step in range(n_steps): svi.step(game, my_id, real_action) if step % 100 == 0: print("Step: {}".format(step)) print(game.turn) for card in game.players[game.turn].hand: print(card) for card in get_unknown_cards(game, my_id): if real_action and card in real_action.cards: continue # grab the learned variational parameters
def guide(x, y): dists = {} for name, par in model.named_parameters(): loc = pyro.param(name + '.loc', torch.randn(*par.shape)) scale = softplus(pyro.param(name + '.scale', -3.0 * torch.ones(*par.shape) + 0.05 * torch.randn(*par.shape))) dists[name] = dist.Normal(loc, scale).independent(par.dim()) bayesian_model = pyro.random_module('bayesian_model', model, dists) return bayesian_model() #optim = Adam({"lr": 0.05}) #svi = SVI(pyromodel, guide, optim, loss=Trace_ELBO()) AdamArgs = { 'lr': 0.05} optimizer = torch.optim.Adam scheduler = pyro.optim.ExponentialLR({'optimizer': optimizer, 'optim_args': AdamArgs, 'gamma': 0.99995 }) svi = SVI(pyromodel, guide, scheduler, loss=Trace_ELBO(), num_samples=EPOCHS) loss_hist = [] pyro.clear_param_store() for j in range(EPOCHS): loss = svi.step(torch.tensor(x), torch.tensor(y)) if j % 100 == 0: #print("[iteration %04d] loss: %.4f" % (j + 1, loss / float(N))) loss_hist.append(np.mean(loss)) print(f"epoch {j}/{EPOCHS} :", loss_hist[-1]) plt.figure() plt.plot(loss_hist) plt.yscale('log') plt.title("ELBO")
def training(args, rel_embeddings, word_embeddings): if args.seed is not None: pyro.set_rng_seed(args.seed) # CUDA for PyTorch cuda_available = torch.cuda.is_available() if (cuda_available and args.cuda): device = torch.device("cuda") torch.cuda.set_device(0) print("using gpu acceleration") print("Generating Config") config = Config( word_embeddings=torch.FloatTensor(word_embeddings), decoder_hidden_dim=args.decoder_hidden_dim, num_relations=7, encoder_hidden_dim=args.encoder_hidden_dim, num_predicates=1000, batch_size=args.batch_size ) # initialize the generator model generator = SimpleGenerator(config) # setup the optimizer adam_params = {"lr": args.learning_rate, "betas": (args.beta_1, 0.999)} optimizer = ClippedAdam(adam_params) # set up the loss(es) for inference. wrapping the guide in config_enumerate builds the loss as a sum # by enumerating each class label for the sampled discrete categorical distribution in the model if args.enumerate: guide = config_enumerate(generator.guide, args.enum_discrete, expand=True) else: guide = generator.guide elbo = (JitTraceEnum_ELBO if args.jit else TraceEnum_ELBO)(max_plate_nesting=1) loss_basic = SVI(generator.model, guide, optimizer, loss=elbo) # build a list of all losses considered losses = [loss_basic] # aux_loss: whether to use the auxiliary loss from NIPS 14 paper (Kingma et al) if args.aux_loss: elbo = JitTrace_ELBO() if args.jit else Trace_ELBO() loss_aux = SVI(generator.model_identify, generator.guide_identify, optimizer, loss=elbo) losses.append(loss_aux) # prepare data real_model = RealModel(rel_embeddings, word_embeddings) data = real_model.generate_data() sup_train_set = data[:100] unsup_train_set = data[100:700] eval_set = data[700:900] test_set = data[900:] data_loaders = setup_data_loaders(sup_train_set, unsup_train_set, eval_set, test_set, batch_size=args.batch_size) num_train = len(sup_train_set) + len(unsup_train_set) num_eval = len(eval_set) num_test = len(test_set) # how often would a supervised batch be encountered during inference # e.g. if sup_num is 3000, we would have every 16th = int(50000/3000) batch supervised # until we have traversed through the all supervised batches periodic_interval_batches = int(1.0 * num_train / len(sup_train_set)) # setup the logger if a filename is provided log_fn = "./logs/" + args.experiment_type + '/' + args.experiment_name + '.log' logger = open(log_fn, "w") # run inference for a certain number of epochs for i in tqdm(range(0, args.num_epochs)): # get the losses for an epoch epoch_losses_sup, epoch_losses_unsup = \ train_epoch(data_loaders=data_loaders, models=losses, periodic_interval_batches=periodic_interval_batches) # compute average epoch losses i.e. losses per example avg_epoch_losses_sup = map(lambda v: v / len(sup_train_set), epoch_losses_sup) avg_epoch_losses_unsup = map(lambda v: v / len(unsup_train_set), epoch_losses_unsup) # store the loss and validation/testing accuracies in the logfile str_loss_sup = " ".join(map(str, avg_epoch_losses_sup)) str_loss_unsup = " ".join(map(str, avg_epoch_losses_unsup)) str_print = "{} epoch: avg losses {}".format(i, "{} {}".format(str_loss_sup, str_loss_unsup)) print_and_log(logger, str_print) # save trained models torch.save(generator.state_dict(), './models/test_generator_state_dict.pth') return generator
def main(args): # Init tensorboard writer = SummaryWriter('./runs/' + args.runname + str(args.trialnumber)) model_name = 'VanillaDMMClimate' training_log_loc = Path( './logs/{}/training.log'.format(model_name.lower())) # Set evaluation log file evaluation_logpath = './logs/{}/evaluation_climate_result.log'.format( model_name.lower()) log_evaluation(evaluation_logpath, 'Evaluation Trial - {}\n'.format(args.trialnumber)) # Constants time_length = 30 input_length_for_pred = 20 pred_length = time_length - input_length_for_pred validation_pred_lengths = [5, 10, 15, 20] train_batch_size = 16 valid_batch_size = 1 training_size_ratio = 0.7 data_min_val, data_max_val = 0, 80 # For model input_channels = 1 z_channels = 50 emission_channels = [64, 32] transition_channels = 64 encoder_channels = [32, 64] rnn_input_dim = 256 rnn_channels = 128 kernel_size = 3 pred_length = 10 # Device checking use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") # Make dataset logging.info("Generate data") temp_train_data, temp_valid_data = get_netcdf_data( args.datapath, training_size_ratio, time_length) logging.info("Train data shape: {}".format(temp_train_data.shape)) logging.info("Valid data shape: {}".format(temp_valid_data.shape)) train_dataset = CMAPDataset(torch.Tensor(temp_train_data)) valid_dataset = CMAPDataset(torch.Tensor(temp_valid_data)) # Create data loaders from pickle data logging.info("Generate data loaders") train_dataloader = DataLoader( train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=8) valid_dataloader = DataLoader( valid_dataset, batch_size=valid_batch_size, num_workers=4) # Training parameters width = temp_train_data.shape[3] height = temp_train_data.shape[2] input_dim = width * height # Create model logging.warning("Generate model") logging.warning("Height and width: {}, {}".format(height, width)) logging.warning(input_dim) pred_input_dim = 10 dmm = DMM(input_channels=input_channels, z_channels=z_channels, emission_channels=emission_channels, transition_channels=transition_channels, encoder_channels=encoder_channels, rnn_input_dim=rnn_input_dim, rnn_channels=rnn_channels, kernel_size=kernel_size, height=height, width=width, pred_input_dim=pred_input_dim, num_layers=1, rnn_dropout_rate=0.0, num_iafs=0, iaf_dim=50, use_cuda=use_cuda) # Initialize model logging.info("Initialize model") epochs = args.endepoch learning_rate = 0.0001 beta1 = 0.9 beta2 = 0.999 clip_norm = 10.0 lr_decay = 1.0 weight_decay = 0 adam_params = {"lr": learning_rate, "betas": (beta1, beta2), "clip_norm": clip_norm, "lrd": lr_decay, "weight_decay": weight_decay} adam = ClippedAdam(adam_params) elbo = Trace_ELBO() svi = SVI(dmm.model, dmm.guide, adam, loss=elbo) # saves the model and optimizer states to disk save_model = Path('./checkpoints/' + model_name) def save_checkpoint(epoch): save_dir = save_model / '{}.model'.format(epoch) save_opt_dir = save_model / '{}.opt'.format(epoch) logging.info("saving model to %s..." % save_dir) torch.save(dmm.state_dict(), save_dir) logging.info("saving optimizer states to %s..." % save_opt_dir) adam.save(save_opt_dir) logging.info("done saving model and optimizer checkpoints to disk.") # Staring epoch start_epoch = args.startepoch # loads the model and optimizer states from disk if start_epoch != 0: load_opt = './checkpoints/' + model_name + \ '/e{}-i119-tn{}-opt.opt'.format(start_epoch - 1, args.trialnumber) load_model = './checkpoints/' + model_name + \ '/e{}-i119-tn{}.pt'.format(start_epoch - 1, args.trialnumber) def load_checkpoint(): # assert exists(load_opt) and exists(load_model), \ # "--load-model and/or --load-opt misspecified" logging.info("loading model from %s..." % load_model) dmm.load_state_dict(torch.load(load_model, map_location=device)) # logging.info("loading optimizer states from %s..." % load_opt) # adam.load(load_opt) # logging.info("done loading model and optimizer states.") if load_model != '': logging.info('Load checkpoint') load_checkpoint() # Validation only? validation_only = args.validonly # Train the model if not validation_only: logging.info("Training model") annealing_epochs = 1000 minimum_annealing_factor = 0.2 N_train_size = temp_train_data.shape[0] N_mini_batches = int(N_train_size / train_batch_size + int(N_train_size % train_batch_size > 0)) for epoch in tqdm(range(start_epoch, epochs), desc='Epoch', leave=True): r_loss_train = 0 dmm.train(True) idx = 0 mov_avg_loss = 0 mov_data_len = 0 for which_mini_batch, data in enumerate(tqdm(train_dataloader, desc='Train', leave=True)): if annealing_epochs > 0 and epoch < annealing_epochs: # compute the KL annealing factor approriate for the current mini-batch in the current epoch min_af = minimum_annealing_factor annealing_factor = min_af + (1.0 - min_af) * \ (float(which_mini_batch + epoch * N_mini_batches + 1) / float(annealing_epochs * N_mini_batches)) else: # by default the KL annealing factor is unity annealing_factor = 1.0 data['observation'] = normalize( data['observation'].unsqueeze(2).to(device), data_min_val, data_max_val) batch_size, length, _, w, h = data['observation'].shape data_reversed = reverse_sequences(data['observation']) data_mask = torch.ones( batch_size, length, input_channels, w, h).cuda() loss = svi.step(data['observation'], data_reversed, data_mask, annealing_factor) # Running losses mov_avg_loss += loss mov_data_len += batch_size r_loss_train += loss idx += 1 # Average losses train_loss_avg = r_loss_train / (len(train_dataset) * time_length) writer.add_scalar('Loss/train', train_loss_avg, epoch) logging.info("Epoch: %d, Training loss: %1.5f", epoch, train_loss_avg) # # Time to time evaluation if epoch == epochs - 1: for temp_pred_length in validation_pred_lengths: r_loss_valid = 0 r_loss_loc_valid = 0 r_loss_scale_valid = 0 dmm.train(False) val_pred_length = temp_pred_length val_pred_input_length = 10 with torch.no_grad(): for i, data in enumerate(tqdm(valid_dataloader, desc='Eval', leave=True)): data['observation'] = normalize( data['observation'].unsqueeze(2).to(device), data_min_val, data_max_val) batch_size, length, _, w, h = data['observation'].shape data_reversed = reverse_sequences( data['observation']) data_mask = torch.ones( batch_size, length, input_channels, w, h).cuda() pred_tensor = data['observation'][:, :input_length_for_pred, :, :, :] pred_tensor_reversed = reverse_sequences( pred_tensor) pred_tensor_mask = torch.ones( batch_size, input_length_for_pred, input_channels, w, h).cuda() ground_truth = data['observation'][:, input_length_for_pred:, :, :, :] val_nll = svi.evaluate_loss( data['observation'], data_reversed, data_mask) # _, _, loss_loc, loss_scale = do_prediction( # dmm, pred_tensor, pred_tensor_reversed, pred_tensor_mask, val_pred_length, ground_truth) preds, _, loss_loc, loss_scale = do_prediction_rep_inference( dmm, pred_tensor_mask, val_pred_length, val_pred_input_length, data['observation']) ground_truth = denormalize( data['observation'].squeeze().cpu().detach(), data_min_val, data_max_val ) pred_with_input = denormalize( torch.cat( [data['observation'][:, :-val_pred_length, :, :, :].squeeze(), preds.squeeze()], dim=0 ).cpu().detach(), data_min_val, data_max_val ) # Running losses r_loss_valid += val_nll r_loss_loc_valid += loss_loc r_loss_scale_valid += loss_scale # Average losses valid_loss_avg = r_loss_valid / \ (len(valid_dataset) * time_length) valid_loss_loc_avg = r_loss_loc_valid / \ (len(valid_dataset) * val_pred_length * width * height) valid_loss_scale_avg = r_loss_scale_valid / \ (len(valid_dataset) * val_pred_length * width * height) writer.add_scalar('Loss/test', valid_loss_avg, epoch) writer.add_scalar( 'Loss/test_obs', valid_loss_loc_avg, epoch) writer.add_scalar('Loss/test_scale', valid_loss_scale_avg, epoch) logging.info("Validation loss: %1.5f", valid_loss_avg) logging.info("Validation obs loss: %1.5f", valid_loss_loc_avg) logging.info("Validation scale loss: %1.5f", valid_loss_scale_avg) log_evaluation(evaluation_logpath, "Validation obs loss for {}s pred {}: {}\n".format( val_pred_length, args.trialnumber, valid_loss_loc_avg)) log_evaluation(evaluation_logpath, "Validation scale loss for {}s pred {}: {}\n".format( val_pred_length, args.trialnumber, valid_loss_scale_avg)) # Save model torch.save(dmm.state_dict(), args.modelsavepath / model_name / 'e{}-i{}-tn{}.pt'.format(epoch, idx, args.trialnumber)) adam.save(args.modelsavepath / model_name / 'e{}-i{}-tn{}-opt.opt'.format(epoch, idx, args.trialnumber)) # Last validation after training test_samples_indices = range(temp_valid_data.shape[0]) total_n = 0 if validation_only: r_loss_loc_valid = 0 r_loss_scale_valid = 0 r_loss_latent_valid = 0 dmm.train(False) val_pred_length = args.validpredlength val_pred_input_length = 10 with torch.no_grad(): for i in tqdm(test_samples_indices, desc='Valid', leave=True): # Data processing data = valid_dataset[i] if torch.isnan(torch.sum(data['observation'])): print("Skip {}".format(i)) continue else: total_n += 1 data['observation'] = normalize( data['observation'].unsqueeze(0).unsqueeze(2).to(device), data_min_val, data_max_val) batch_size, length, _, w, h = data['observation'].shape data_reversed = reverse_sequences(data['observation']) data_mask = torch.ones( batch_size, length, input_channels, w, h).to(device) # Prediction pred_tensor_mask = torch.ones( batch_size, input_length_for_pred, input_channels, w, h).to(device) preds, _, loss_loc, loss_scale = do_prediction_rep_inference( dmm, pred_tensor_mask, val_pred_length, val_pred_input_length, data['observation']) ground_truth = denormalize( data['observation'].squeeze().cpu().detach(), data_min_val, data_max_val ) pred_with_input = denormalize( torch.cat( [data['observation'][:, :-val_pred_length, :, :, :].squeeze(), preds.squeeze()], dim=0 ).cpu().detach(), data_min_val, data_max_val ) # Save samples if i < 5: save_dir_samples = Path( './samples/climate/{}s'.format(val_pred_length)) with open(save_dir_samples / '{}-gt.pkl'.format(i), 'wb') as fout: pickle.dump(ground_truth, fout) with open(save_dir_samples / '{}-vanilladmm-pred.pkl'.format(i), 'wb') as fout: pickle.dump(pred_with_input, fout) # Running losses r_loss_loc_valid += loss_loc r_loss_scale_valid += loss_scale r_loss_latent_valid += np.sum((preds.squeeze().detach().cpu().numpy( ) - data['latent'][time_length - val_pred_length:, :, :].detach().cpu().numpy()) ** 2) # Average losses test_samples_indices = range(total_n) print(total_n) valid_loss_loc_avg = r_loss_loc_valid / \ (total_n * val_pred_length * width * height) valid_loss_scale_avg = r_loss_scale_valid / \ (total_n * val_pred_length * width * height) valid_loss_latent_avg = r_loss_latent_valid / \ (total_n * val_pred_length * width * height) logging.info("Validation obs loss for %ds pred VanillaDMM: %f", val_pred_length, valid_loss_loc_avg) logging.info("Validation latent loss: %f", valid_loss_latent_avg) with open('VanillaDMMClimateResult.log', 'a+') as fout: validation_log = 'Pred {}s VanillaDMM: {}\n'.format( val_pred_length, valid_loss_loc_avg) fout.write(validation_log)
def inference(Model, training_data, test_data = None, config = None): ''' A wrapper function calling Pyro's SVI step with settings given in config. Records telemetry including elbo loss, mean negative log likelihood on held out data, gradient norms and parameter history during training. If config includes telemetry from a previous inference run, inference continues from that run. If slope_significance is set to a value less than 1, training halts when the mean negative log likelihood converges. Convergence is estimated by linear regression in a moving window of size convergence_window when p(slope = estimate|true_slope = 0) < slope_significance. Default config is config = dict( n_iter = 1000, learning_rate = 0.1, beta1 = 0.9, beta2 = 0.999, learning_rate_decay = 1., # no decay by default batch_size = 32, n_elbo_particles = 32, n_posterior_samples = 1024, window = 500, convergence_window = 30, slope_significance = 0.1, track_params = False, monitor_gradients = False, telemetry = None ) Example: ''' #initcopy = clone_init(init) if config is None: config = dict( n_iter = 1000, learning_rate = 0.1, beta1 = 0.9, beta2 = 0.999, learning_rate_decay = 1., # no decay by default batch_size = 32, n_elbo_particles = 32, n_posterior_samples = 1024, window = 500, convergence_window = 30, slope_significance = 0.1, track_params = False, monitor_gradients = False, telemetry = None, ) if test_data is None: training_data, test_data = train_test_split(training_data) #def per_param_callable(module_name, param_name): # return {"lr": config['learning_rate'], "betas": (0.90, 0.999)} # from http://pyro.ai/examples/svi_part_i.html model = Model.model guide = Model.guide optim = pyro.optim.Adam({"lr": config['learning_rate'], "betas": (config['beta1'], config['beta2'])}) # if there is previous telemetry in the config from an interrupted inference run # restore the state of that inference and continue training if config['telemetry']: pyro.clear_param_store() print('Continuing from previous inference run.') telemetry = config['telemetry'] optim.set_state(telemetry['optimizer_state']) pyro.get_param_store().set_state(telemetry['param_store_state']) i = len(telemetry['loss']) config['n_iter'] += i # init params not in telemetry model(training_data) guide(training_data) for k,v in pyro.get_param_store().items(): if k not in telemetry['param_history'].keys(): telemetry['param_history'][k] = v.unsqueeze(0) else: pyro.clear_param_store() telemetry = dict() telemetry['gradient_norms'] = defaultdict(list) telemetry['loss'] = [] telemetry['MNLL'] = [] telemetry['training_duration'] = 0 # call model and guide to populate param store #model(training_data, config['batch_size'], init) #guide(training_data, config['batch_size'], init) Model.batch_size = config['batch_size'] model(training_data) guide(training_data) # record init in param_history telemetry['param_history'] = dict({k:v.unsqueeze(0) for k,v in pyro.get_param_store().items()}) # record MNLL at init i = 0 with torch.no_grad(): #mnll = compute_mnll(model, guide, test_data, n_samples=config['n_posterior_samples']) telemetry['MNLL'].append(-Model.mnll(test_data, config['n_posterior_samples'])) print('\n') print("NLL after {}/{} iterations is {}".format(i,config['n_iter'], telemetry['MNLL'][-1])) # Learning rate schedulers # Haven't found a way to get and set its state for checkpointing #optim = torch.optim.Adam #scheduler = pyro.optim.ExponentialLR({'optimizer': optim, 'optim_args': {"lr": config['learning_rate'], "betas": (beta1, beta2)}, 'gamma': config['learning_rate_decay']}) #scheduler = pyro.optim.ExponentialLR({'optimizer': optim, 'optim_args': per_param_callable, 'gamma': config['learning_rate_decay']}) max_plate_nesting = _guess_max_plate_nesting(model,(training_data,),{}) #print("Guessed that model has max {} nested plates.".format(max_plate_nesting)) # look for sample sites with infer:enumerate trace = pyro.poutine.trace(model).get_trace(training_data) contains_enumeration = any([values['infer'] == {'enumerate': 'parallel'} for node,values in trace.nodes.items() if 'infer' in values]) if contains_enumeration: elbo = TraceEnum_ELBO(max_plate_nesting=max_plate_nesting, num_particles=config['n_elbo_particles'], vectorize_particles=True) else: elbo = Trace_ELBO(max_plate_nesting=max_plate_nesting, num_particles=config['n_elbo_particles'], vectorize_particles=True) #svi = SVI(model, guide, scheduler, loss=elbo) svi = SVI(model, guide, optim, loss=elbo) if config['monitor_gradients']: # register gradient hooks for monitoring for name, value in pyro.get_param_store().named_parameters(): value.register_hook(lambda g, name=name: telemetry['gradient_norms'][name].append(g.norm().item())) start = time.time() # with torch.no_grad(): # mnll = compute_mnll(model, guide, test_data, init, n_samples=config['n_posterior_samples']) # telemetry['MNLL'].append(-mnll) # print("NLL at init is {}".format(mnlls[-1])) while p_value_of_slope(telemetry['MNLL'],config['convergence_window'], config['slope_significance']) < config['slope_significance'] and i < config['n_iter']: try: loss = svi.step(training_data) telemetry['loss'].append(loss) if i % config['window'] or i <= config['window']: print('.', end='') #scheduler.step() else: with torch.no_grad(): #mnll = compute_mnll(model, guide, test_data, n_samples=config['n_posterior_samples']) telemetry['MNLL'].append(-Model.mnll(test_data, config['n_posterior_samples'])) print('\n') print("NLL after {}/{} iterations is {}".format(i,config['n_iter'], telemetry['MNLL'][-1])) print('\n') #print('\nSetting number of posterior samples to {}'.format(config['n_posterior_samples']), end='') #print('\nSetting batch size to {}'.format(config['batch_size']), end='') if config['track_params']: telemetry['param_history'] = {k:torch.cat([telemetry['param_history'][k],v.unsqueeze(0).detach()],dim=0) for k,v in pyro.get_param_store().items()} i += 1 # except RuntimeError as e: # print(e) # print("There was a runtime error.") # return telemetry except KeyboardInterrupt: print('\Interrupted by user after {} iterations.\n'.format(i)) params = {k:v.detach() for k,v in pyro.get_param_store().items()} Model.params = params telemetry['training_duration'] += round(time.time() - start) telemetry['optimizer_state'] = optim.get_state() telemetry['param_store_state'] = pyro.get_param_store().get_state() return telemetry print('\nConverged in {} iterations.\n'.format(i)) # make all pytorch tensors into np arrays, which consume less disk space #param_history = dict(zip(param_history.keys(),map(lambda x: x.detach().numpy(), param_history.values()))) params = {k:v.detach() for k,v in pyro.get_param_store().items()} Model.params = params telemetry['training_duration'] += round(time.time() - start) telemetry['optimizer_state'] = optim.get_state() telemetry['param_store_state'] = pyro.get_param_store().get_state() return telemetry
def main(args): # clear param store pyro.clear_param_store() # setup MNIST data loaders # train_loader, test_loader train_loader, test_loader = setup_data_loaders(MNIST, use_cuda=args.cuda, batch_size=256) # setup the VAE vae = VAE(use_cuda=args.cuda) # setup the optimizer adam_args = {"lr": args.learning_rate} optimizer = Adam(adam_args) # setup the inference algorithm elbo = JitTrace_ELBO() if args.jit else Trace_ELBO() svi = SVI(vae.model, vae.guide, optimizer, loss=elbo) # setup visdom for visualization if args.visdom_flag: vis = visdom.Visdom() train_elbo = [] test_elbo = [] # training loop for epoch in range(args.num_epochs): # initialize loss accumulator epoch_loss = 0. # do a training epoch over each mini-batch x returned # by the data loader for x, _ in train_loader: # if on GPU put mini-batch into CUDA memory if args.cuda: x = x.cuda() # do ELBO gradient and accumulate loss epoch_loss += svi.step(x) # report training diagnostics normalizer_train = len(train_loader.dataset) total_epoch_loss_train = epoch_loss / normalizer_train train_elbo.append(total_epoch_loss_train) print("[epoch %03d] average training loss: %.4f" % (epoch, total_epoch_loss_train)) if epoch % args.test_frequency == 0: # initialize loss accumulator test_loss = 0. # compute the loss over the entire test set for i, (x, _) in enumerate(test_loader): # if on GPU put mini-batch into CUDA memory if args.cuda: x = x.cuda() # compute ELBO estimate and accumulate loss test_loss += svi.evaluate_loss(x) # pick three random test images from the first mini-batch and # visualize how well we're reconstructing them if i == 0: if args.visdom_flag: plot_vae_samples(vae, vis) reco_indices = np.random.randint(0, x.shape[0], 3) for index in reco_indices: test_img = x[index, :] reco_img = vae.reconstruct_img(test_img) vis.image(test_img.reshape(28, 28).detach().cpu().numpy(), opts={'caption': 'test image'}) vis.image(reco_img.reshape(28, 28).detach().cpu().numpy(), opts={'caption': 'reconstructed image'}) # report test diagnostics normalizer_test = len(test_loader.dataset) total_epoch_loss_test = test_loss / normalizer_test test_elbo.append(total_epoch_loss_test) print("[epoch %03d] average test loss: %.4f" % (epoch, total_epoch_loss_test)) if epoch == args.tsne_iter: mnist_test_tsne(vae=vae, test_loader=test_loader) plot_llk(np.array(train_elbo), np.array(test_elbo)) return vae
def main(**kwargs): args = argparse.Namespace(**kwargs) if 'save' in args: if os.path.exists(args.save): raise RuntimeError('Output file "{}" already exists.'.format(args.save)) if args.seed is not None: pyro.set_rng_seed(args.seed) X, true_counts = load_data() X_size = X.size(0) if args.cuda: X = X.cuda() # Build a function to compute z_pres prior probabilities. if args.z_pres_prior_raw: def base_z_pres_prior_p(t): return args.z_pres_prior else: base_z_pres_prior_p = make_prior(args.z_pres_prior) # Wrap with logic to apply any annealing. def z_pres_prior_p(opt_step, time_step): p = base_z_pres_prior_p(time_step) if args.anneal_prior == 'none': return p else: decay = dict(lin=lin_decay, exp=exp_decay)[args.anneal_prior] return decay(p, args.anneal_prior_to, args.anneal_prior_begin, args.anneal_prior_duration, opt_step) model_arg_keys = ['window_size', 'rnn_hidden_size', 'decoder_output_bias', 'decoder_output_use_sigmoid', 'baseline_scalar', 'encoder_net', 'decoder_net', 'predict_net', 'embed_net', 'bl_predict_net', 'non_linearity', 'pos_prior_mean', 'pos_prior_sd', 'scale_prior_mean', 'scale_prior_sd'] model_args = {key: getattr(args, key) for key in model_arg_keys if key in args} air = AIR( num_steps=args.model_steps, x_size=50, use_masking=not args.no_masking, use_baselines=not args.no_baselines, z_what_size=args.encoder_latent_size, use_cuda=args.cuda, **model_args ) if args.verbose: print(air) print(args) if 'load' in args: print('Loading parameters...') air.load_state_dict(torch.load(args.load)) vis = visdom.Visdom(env=args.visdom_env) # Viz sample from prior. if args.viz: z, x = air.prior(5, z_pres_prior_p=partial(z_pres_prior_p, 0)) vis.images(draw_many(x, tensor_to_objs(latents_to_tensor(z)))) def per_param_optim_args(module_name, param_name): lr = args.baseline_learning_rate if 'bl_' in param_name else args.learning_rate return {'lr': lr} svi = SVI(air.model, air.guide, optim.Adam(per_param_optim_args), loss=TraceGraph_ELBO()) # Do inference. t0 = time.time() examples_to_viz = X[5:10] for i in range(1, args.num_steps + 1): loss = svi.step(X, args.batch_size, z_pres_prior_p=partial(z_pres_prior_p, i)) if args.progress_every > 0 and i % args.progress_every == 0: print('i={}, epochs={:.2f}, elapsed={:.2f}, elbo={:.2f}'.format( i, (i * args.batch_size) / X_size, (time.time() - t0) / 3600, loss / X_size)) if args.viz and i % args.viz_every == 0: trace = poutine.trace(air.guide).get_trace(examples_to_viz, None) z, recons = poutine.replay(air.prior, trace=trace)(examples_to_viz.size(0)) z_wheres = tensor_to_objs(latents_to_tensor(z)) # Show data with inferred objection positions. vis.images(draw_many(examples_to_viz, z_wheres)) # Show reconstructions of data. vis.images(draw_many(recons, z_wheres)) if args.eval_every > 0 and i % args.eval_every == 0: # Measure accuracy on subset of training data. acc, counts, error_z, error_ix = count_accuracy(X, true_counts, air, 1000) print('i={}, accuracy={}, counts={}'.format(i, acc, counts.numpy().tolist())) if args.viz and error_ix.size(0) > 0: vis.images(draw_many(X[error_ix[0:5]], tensor_to_objs(error_z[0:5])), opts=dict(caption='errors ({})'.format(i))) if 'save' in args and i % args.save_every == 0: print('Saving parameters...') torch.save(air.state_dict(), args.save)
################## ## Do inference ## ################## # Load data train_loader, test_loader = setup_data_loaders(batch_size=256) # clear param store pyro.clear_param_store() # setup the Factor Analysis model fa = FA() # setup the inference algorithm svi = SVI(fa.model, fa.guide, optim=Adam({"lr": LEARNING_RATE}), loss=Trace_ELBO()) # training loop train_elbo = [] test_elbo = [] for epoch in range(NUM_EPOCHS): total_epoch_loss_train = train(svi, train_loader) train_elbo.append(-total_epoch_loss_train) print("[epoch %03d] average training loss: %.4f" % (epoch, total_epoch_loss_train)) if epoch % TEST_FREQUENCY == 0: total_epoch_loss_test = evaluate(svi, test_loader) test_elbo.append(-total_epoch_loss_test) print("[epoch %03d] average test loss: %.4f" %
def main(args): pyro.set_rng_seed(0) pyro.clear_param_store() pyro.enable_validation(__debug__) # load data if args.dataset == "dipper": capture_history_file = os.path.dirname( os.path.abspath(__file__)) + '/dipper_capture_history.csv' elif args.dataset == "vole": capture_history_file = os.path.dirname( os.path.abspath(__file__)) + '/meadow_voles_capture_history.csv' else: raise ValueError("Available datasets are \'dipper\' and \'vole\'.") capture_history = torch.tensor( np.genfromtxt(capture_history_file, delimiter=',')).float()[:, 1:] N, T = capture_history.shape print( "Loaded {} capture history for {} individuals collected over {} time periods." .format(args.dataset, N, T)) if args.dataset == "dipper" and args.model in ["4", "5"]: sex_file = os.path.dirname( os.path.abspath(__file__)) + '/dipper_sex.csv' sex = torch.tensor(np.genfromtxt(sex_file, delimiter=',')).float()[:, 1] print("Loaded dipper sex data.") elif args.dataset == "vole" and args.model in ["4", "5"]: raise ValueError( "Cannot run model_{} on meadow voles data, since we lack sex " + "information for these animals.".format(args.model)) else: sex = None model = models[args.model] # we use poutine.block to only expose the continuous latent variables # in the models to AutoDiagonalNormal (all of which begin with 'phi' # or 'rho') def expose_fn(msg): return msg["name"][0:3] in ['phi', 'rho'] # we use a mean field diagonal normal variational distributions (i.e. guide) # for the continuous latent variables. guide = AutoDiagonalNormal(poutine.block(model, expose_fn=expose_fn)) # since we enumerate the discrete random variables, # we need to use TraceEnum_ELBO. elbo = TraceEnum_ELBO(max_plate_nesting=1, num_particles=20, vectorize_particles=True) optim = Adam({'lr': args.learning_rate}) svi = SVI(model, guide, optim, elbo) losses = [] print( "Beginning training of model_{} with Stochastic Variational Inference." .format(args.model)) for step in range(args.num_steps): loss = svi.step(capture_history, sex) losses.append(loss) if step % 20 == 0 and step > 0 or step == args.num_steps - 1: print("[iteration %03d] loss: %.3f" % (step, np.mean(losses[-20:]))) # evaluate final trained model elbo_eval = TraceEnum_ELBO(max_plate_nesting=1, num_particles=2000, vectorize_particles=True) svi_eval = SVI(model, guide, optim, elbo_eval) print("Final loss: %.4f" % svi_eval.evaluate_loss(capture_history, sex))
tau = pyro.param('tau', lambda: MultivariateNormal(torch.zeros(D), torch.eye(D)).sample()) with pyro.plate("prec_plate", D): q_prec = pyro.sample("prec", Gamma(alpha, beta)) # with pyro.plate("corr_chol_plate", 1): q_corr_chol = pyro.sample("corr_chol", LKJCorrCholesky(d=D, eta=psi)) _q_std = torch.sqrt(1. / q_prec.squeeze()) q_sigma_chol = torch.mm(torch.diag(_q_std), q_corr_chol.squeeze()) q_mu = pyro.sample("mu", MultivariateNormal(tau, scale_tril=q_sigma_chol)) optim = Adam({"lr": 0.01}) svi = SVI(model, guide, optim, loss=Trace_ELBO(num_particles=10)) def train(num_iterations): losses = [] pyro.clear_param_store() # fig = plt.figure(figsize=(5, 5)) # plt.scatter(data[:, 0], data[:, 1], color="blue", marker="+") # center, covar = marginal(guide, num_samples=100) # artist = animate(fig.gca(), None, center, covar) for j in tqdm(range(num_iterations)): loss = svi.step(data) losses.append(loss)
class GaussianMixtureModel(object): def __init__(self, data, number_of_hidden_states=3): self.number_of_hidden_states = number_of_hidden_states self.data = data self.global_guide = AutoDelta( poutine.block(self.model, expose=['weights', 'locs', 'scale'])) self.svi = None self.losses = None self.gradient_norms = None @config_enumerate def model(self, data): # Global variables. weights = pyro.sample( 'weights', dist.Dirichlet(0.5 * torch.ones(self.number_of_hidden_states))) with pyro.plate('components', self.number_of_hidden_states): locs = pyro.sample('locs', dist.Normal(0., 10.)) scale = pyro.sample('scale', dist.LogNormal(0., 2.)) with pyro.plate('data', len(data)): # Local variables. assignment = pyro.sample('assignment', dist.Categorical(weights)) pyro.sample('obs', dist.Normal(locs[assignment], scale[assignment]), obs=data) def initialize(self, seed): pyro.set_rng_seed(seed) pyro.clear_param_store() pyro.param('auto_weights', 0.5 * torch.ones(self.number_of_hidden_states), constraint=constraints.simplex) pyro.param('auto_scale', (self.data.var() / 2).sqrt(), constraint=constraints.positive) pyro.param( 'auto_locs', self.data[torch.multinomial( torch.ones(len(self.data)) / len(self.data), self.number_of_hidden_states)]) optim = pyro.optim.Adam({'lr': 0.1, 'betas': [0.8, 0.99]}) elbo = TraceEnum_ELBO(max_plate_nesting=1) self.svi = SVI(self.model, self.global_guide, optim, loss=elbo) loss = self.svi.loss(self.model, self.global_guide, self.data) return loss def train(self): loss, seed = min((self.initialize(seed), seed) for seed in range(100)) self.initialize(seed) gradient_norms = defaultdict(list) for name, value in pyro.get_param_store().named_parameters(): value.register_hook(lambda g, name=name: gradient_norms[name]. append(g.norm().item())) losses = [] for i in range(200 if not smoke_test else 2): loss = self.svi.step(self.data) losses.append(loss) self.losses = losses self.gradient_norms = gradient_norms def show_losses(self): assert self.losses is not None, "must train the model before showing losses" \ "" pyplot.figure(figsize=(10, 3), dpi=100).set_facecolor('white') pyplot.plot(self.losses) pyplot.xlabel('iters', size=18) pyplot.ylabel('loss', size=18) #pyplot.yscale('log') pyplot.grid() pyplot.title('Convergence of stochastic variational inference', size=20) pyplot.show() def show_gradients_norm(self): pyplot.figure(figsize=(10, 4), dpi=100).set_facecolor('white') for name, grad_norms in self.gradient_norms.items(): pyplot.plot(grad_norms, label=name) pyplot.xlabel('iters') pyplot.ylabel('gradient norm') pyplot.yscale('log') pyplot.legend(loc='best') pyplot.title('Gradient norms during SVI') pyplot.show() def return_map_estimate(self): map_estimates = self.global_guide(self.data) weights = map_estimates['weights'].data.numpy() locs = map_estimates['locs'].data.numpy() scale = map_estimates['scale'].data.numpy() return weights, locs, scale
if 'predictor' in module_name: return {"lr": 0.001} elif '_loc' in param_name: return {"lr": 0.005} elif '_scale' in param_name: return {"lr": 0.005} # CHANGED else: return {"lr": 0.005} # In[20]: svi = SVI( model, guide, #optim.ClippedAdam({"lr": 0.005}), optim.ClippedAdam(per_param_args), loss=Trace_ELBO(), num_samples=1000) pyro.clear_param_store() num_epochs = 30000 track_loglik = True elbo_losses = [] alpha_errors = [] beta_errors = [] betaInd_errors = [] best_elbo = np.inf patience_thre = 5 patience_count = 0 tic = time.time()
train_loader, test_loader = setup_data_loaders(batch_size=512, subset=True) # clear param store pyro.clear_param_store() # setup the Factor Analysis model fa = FA() # Define guide using automatic ELBO with mean-field assumption guide = AutoNormal(fa) # Defineg guide manually # guide = fa.guide optim = Adam({"lr": LEARNING_RATE}) svi = SVI(fa.forward, guide=guide, optim=optim, loss=Trace_ELBO()) # training loop train_elbo = [] test_elbo = [] for epoch in range(NUM_EPOCHS): total_epoch_loss_train = train(svi, train_loader) train_elbo.append(-total_epoch_loss_train) # print("[epoch %03d] average training loss: %.4f" % (epoch, total_epoch_loss_train)) if epoch % TEST_FREQUENCY == 0: total_epoch_loss_test = evaluate(svi, test_loader) test_elbo.append(-total_epoch_loss_test) print("[epoch %03d] average test loss: %.4f" % (epoch, total_epoch_loss_test))
train_unlab_loader, train_lab_loader, val_loader, test_loader = ckd_setup_data_loaders(batch_size=3, use_cuda=USE_CUDA) #read in train data labels (version with -3,3 instead of 0,5) train_lab_labels = np.genfromtxt('../scaled_05/train_data_lab_labels_z.csv', delimiter=',') # clear param store pyro.clear_param_store() # setup the VAE vae = VAE(use_cuda=USE_CUDA) # setup the optimizer adam_args = {"lr": LEARNING_RATE} optimizer = Adam(adam_args) # setup the inference algorithm svi = SVI(vae.model, vae.guide, optimizer, loss=Trace_ELBO()) # trace elbo: bbvi to max elbo (don't have to do integral) # gradient as expectation of another gradient (noisy gradient of elbo) # reparameterization trick: for backprop over random variable (only need for encoder) train_unlab_elbo = [] train_lab_elbo = [] test_elbo = [] # training loop for epoch in range(NUM_EPOCHS): # add unlab total_epoch_loss_train_unlab, total_epoch_loss_train_lab = train(svi, train_unlab_loader, train_lab_loader, train_lab_labels, use_cuda=USE_CUDA) train_unlab_elbo.append(-total_epoch_loss_train_unlab) train_lab_elbo.append(-total_epoch_loss_train_lab)
WRITE_FREQUENCY = 20 smoke_test = False if (smoke_test): pyro.enable_validation(True) pyro.distributions.enable_validation(True) NUM_EPOCHS = 21 else: pyro.enable_validation(False) pyro.distributions.enable_validation(False) NUM_EPOCHS = 101 # setup the optimizer optimizer = Adamax({"lr": 1.0e-3, "betas": (0.9, 0.999)}) #optimizer = RMSprop({"lr": 1.0e-4}) svi = SVI(vae.model, vae.guide, optimizer, loss=Trace_ELBO(num_particles=1)) train_loss, test_loss = [], [] min_loss = 999999 # In[8]: #write_dir = '/Users/ldalessi/VAE_PYRO/ARCHIVE/' #write_dir = "/home/jupyter/REPOS/VAE_PYRO/ARCHIVE/" write_dir = "/home/ldalessi/REPOS/VAE_PYRO/ARCHIVE/" descriptor = "Fashion_MNIST_scale_1000.0" descriptor = "Fashion_TEST" name_vae = "vae_" + descriptor + "_" name_train_loss = "train_loss_" + descriptor + "_" name_test_loss = "test_loss_" + descriptor + "_" name_params = "params_" + descriptor
class RDPModel(nn.Module): """Network for predicting tweet sentiment using variational inference. """ def __init__(self, rels, n_polarities=3): """Class constructor. Args: rels (set): set of relation tuples n_polarities (int): number of polarities to predict """ super(RDPModel, self).__init__() # initialize mapping from relations to indices self._logger = LOGGER self._rel2idx = {rel_i: i for i, rel_i in enumerate(rels, 1)} self._rel2idx[None] = 0 self.n_rels = len(self._rel2idx) self.n_polarities = n_polarities # setup testing variables self._test_epochs = TEST_EPOCHS self._max_test_instances = 10 self._test_wbench = np.empty((self._test_epochs, self._max_test_instances, self.n_polarities)) self.softplus = nn.Softplus() # initialize internal models self.alpha_model = AlphaModel(self.n_rels) self._alpha_model_priors = None self.alpha_guide = AlphaGuide(self.n_rels) self._alpha_guide_prior_params = None self._param_store = pyro.get_param_store() self._best_params = None self._svi = SVI(self.model, self.guide, optim=RMSprop(OPTIM_PARAM), loss=Trace_ELBO()) @property def rel2idx(self): return self._rel2idx @property def best_params(self): return self._best_params # the model: p(x, z) = p(x|z)p(z) def model(self, node_scores, children, rels, labels): # since we will modify `node_scores` in-place, we would like to # preserve the original variant of it to pass it safely to model node_scores = deepcopy(node_scores) # process minibatch n_instances = node_scores.shape[0] max_t = node_scores.shape[1] max_children = children.shape[-1] alpha_model = pyro.random_module( "alpha_model", self.alpha_model, self._get_model_priors() )() # iterate over each instance of the batch with pyro.iarange("batch", size=n_instances) as inst_indices: # iterate over each node of the tree in the bottom-up fashion for i in range(max_t): prnt_scores_i = node_scores[inst_indices, i] rels_i = rels[inst_indices, i] child_scores_i = self._get_child_scores( node_scores, children, inst_indices, i, n_instances, max_children ) # iterate over each child of that node for j in range(max_children): child_scores_ij = child_scores_i[inst_indices, j] var_sfx = "{}_{}".format(i, j) copy_indices, probs2copy, alpha_indices, alpha = \ alpha_model( var_sfx, prnt_scores_i, child_scores_ij, rels_i[inst_indices, j] ) if probs2copy is not None: node_scores[inst_indices[copy_indices], i] = probs2copy if alpha is not None: z_ij = pyro.sample( "z_{}_{}".format(i, j), dist.Dirichlet(alpha)) node_scores[inst_indices[alpha_indices], i] = z_ij prnt_scores_i = node_scores[inst_indices, i] z_ij = node_scores[inst_indices, -1] y = pyro.sample("y", dist.Categorical(z_ij), obs=labels[inst_indices]) return y # the guide (i.e., variational distribution): q(z|x) def guide(self, node_scores, children, rels, labels): # since we will modify `node_scores` in-place, we would like to # preserver the original variant of it to pass it safely to model node_scores = deepcopy(node_scores) # process minibatch n_instances = node_scores.shape[0] max_t = node_scores.shape[1] max_children = children.shape[-1] priors = self._get_guide_priors() self._logger.debug("guide priors: %r", priors) alpha_guide = pyro.random_module("alpha_guide", self.alpha_guide, priors)() self._logger.debug("alpha_guide.z_epsilon: %r", alpha_guide.z_epsilon) self._logger.debug("alpha_guide.M: %r", alpha_guide.M) self._logger.debug("alpha_guide.beta: %r", alpha_guide.beta) self._logger.debug("alpha_guide.scale_factor: %r", alpha_guide.scale_factor) # iterate over each instance of the batch with pyro.iarange("batch", size=n_instances) as inst_indices: # iterate over each node of the tree in the bottom-up fashion for i in range(max_t): self._logger.debug("Considering time step %d", max_t) prnt_scores_i = node_scores[inst_indices, i] self._logger.debug("prnt_scores[%d]: %r", max_t, prnt_scores_i) rels_i = rels[inst_indices, i] self._logger.debug("rels[%d]: %r", max_t, rels_i) child_scores_i = self._get_child_scores( node_scores, children, inst_indices, i, n_instances, max_children ) self._logger.debug("child_scores_i: %r", child_scores_i) self._logger.debug("child_scores[%d]: %r", max_t, rels_i) # iterate over each child of that node for j in range(max_children): self._logger.debug("Considering child %d", j) child_scores_ij = child_scores_i[inst_indices, j] var_sfx = "{}_{}".format(i, j) self._logger.debug("sampling variable %s", var_sfx) self._logger.debug("prnt_scores_i: %r", prnt_scores_i) self._logger.debug("child_scores_ij: %r", child_scores_ij) copy_indices, probs2copy, alpha_indices, alpha = \ alpha_guide( var_sfx, prnt_scores_i, child_scores_ij, rels_i[inst_indices, j] ) self._logger.debug("alpha %r", alpha) self._logger.debug("probs2copy %r", probs2copy) if probs2copy is not None: node_scores[inst_indices[copy_indices], i] = probs2copy if alpha is not None: z_ij = pyro.sample( "z_{}_{}".format(i, j), dist.Dirichlet(alpha)) node_scores[inst_indices[alpha_indices], i] = z_ij prnt_scores_i = node_scores[inst_indices, i] return node_scores[inst_indices, -1] def step(self, data): """Perform a training step on a single epoch. Args: data (torch.utils.data.DataLoader): training dataset Returns: float: loss """ loss = 0. for batch_j in data: loss += self._svi.step(*batch_j) return loss def loss(self, x): """Evaluate the loss function on the given data. Args: x (tuple[tensors]): tensors with input data """ return self._svi.evaluate_loss(*x) def predict(self, x, trg_y): """Predict labels. Args: x (torch.utils.data.DataLoader): trg_y (np.array): array for storing the predicted labels Returns: float: loss """ # resize `self._test_wbench` if necessary n_instances = x[0].shape[0] self._resize_wbench(n_instances) self._test_wbench *= 0 # print("self._test_wbench:", repr(self._test_wbench)) with poutine.block(): with torch.no_grad(): for wbench_i in self._test_wbench: wbench_i[:n_instances] = self.guide(*x) mean = np.mean(self._test_wbench, axis=0) trg_y[:] = np.argmax(mean[:n_instances], axis=-1) return trg_y def debug(self, x, trg_y): """Predict labels. Args: x (torch.utils.data.DataLoader): trg_y (np.array): array for storing the predicted labels Returns: float: loss """ # resize `self._test_wbench` if necessary n_instances = x[0].shape[0] self._resize_wbench(n_instances) self._test_wbench *= 0 # print("self._test_wbench:", repr(self._test_wbench)) with poutine.block(): with torch.no_grad(): for wbench_i in self._test_wbench: wbench_i[:n_instances] = self.guide(*x) break self._logger.debug("self._test_wbench: %r", self._test_wbench) mean = np.mean(self._test_wbench, axis=0) self._logger.debug("self._test_wbench (mean): %r", mean) trg_y[:] = np.argmax(mean[:n_instances], axis=-1) return trg_y def inspect_state(self): """Output current pyro parameters. """ for name in self._param_store.get_all_param_names(): self._logger.info("Param [%s]: %r", name, pyro.param(name).data.numpy()) def remember_state(self): """Remember current pyro parameters. """ self._best_params = deepcopy(self._param_store.get_state()) def set_state(self, params): """Set current pyro parameters. """ self._param_store.set_state(self.best_params) def _get_child_scores(self, node_scores, children, inst_indices, i, n_instances, max_children): child_indices = children[inst_indices, i].reshape(-1) inst_indices.repeat(max_children, 1).t().reshape(-1) child_scores = node_scores[ inst_indices.repeat(max_children, 1).t().reshape(-1), child_indices ].reshape(n_instances, max_children, -1) return child_scores def _get_prior_params(self): """Initialize priors which are common for model and guide. Returns: dict[str -> np.array]: dictionary of distribution parameters """ # relation transformation matrix M_mu = np.eye(self.n_polarities, dtype="float32") M_mu[1, :] = [0., 0.3, 0.] M_mu = np.tile(M_mu, (self.n_rels, 1)).reshape( self.n_rels, self.n_polarities, self.n_polarities ) # for rel, rel_idx in iteritems(self.rel2idx): # # swap axes for contrastive relations # if check_rel(rel, CONTRASTIVE_RELS): # mu_i = M_mu[rel_idx] # mu_i[[0, 2]] = mu_i[[2, 0]] M_mu = torch.tensor(M_mu) M_sigma = torch.tensor( np.ones((self.n_rels, self.n_polarities, self.n_polarities), dtype="float32") ) # beta beta_p = 5. * torch.tensor(np.ones((self.n_rels, self.n_polarities), dtype="float32")) beta_q = 5. * torch.tensor(np.ones((self.n_rels, self.n_polarities), dtype="float32")) # z_epsilon z_epsilon_p = torch.tensor(1.) z_epsilon_q = torch.tensor(15.) # scale factor scale_factor = torch.tensor(34.) return {"M_mu": M_mu, "M_sigma": M_sigma, "beta_p": beta_p, "beta_q": beta_q, "z_epsilon_p": z_epsilon_p, "z_epsilon_q": z_epsilon_q, "scale_factor": scale_factor} def _get_model_priors(self): """Initialize priors for alpha model. Returns: dict: dictionary of priors """ if self._alpha_model_priors: return self._alpha_model_priors # sample the variables from their corresponding distributions params = self._get_prior_params() self._alpha_model_priors = self._params2probs(params) return self._alpha_model_priors def _get_guide_priors(self): """Initialize priors for alpha guide. Args: guide_mode (bool): create priors for guide (i.e., wrap relevant parameters into `pyro.param`) Returns: dict: dictionary of priors """ if not self._alpha_guide_prior_params: # create initial parameters params = self._get_prior_params() # register all parameters in pyro for p, v in iteritems(params): pyro.param(p, v) self._alpha_guide_prior_params = dict( self._param_store.named_parameters() ) else: # register all parameters in pyro for p, v in iteritems(self._alpha_guide_prior_params): pyro.param(p, v) return self._params2probs(self._alpha_guide_prior_params) def _params2probs(self, params): """Convert parameters to probability distributions. Args: params (dict[str -> np.array]): dictionary of distribution parameters Returns: dict[str -> Dist]: dictionary of prior probabilities """ M = dist.Normal(params["M_mu"], self.softplus(params["M_sigma"])).independent(2) beta = dist.Beta(self.softplus(params["beta_p"]), self.softplus(params["beta_q"])).independent(1) z_epsilon = dist.Beta( self.softplus(params["z_epsilon_p"]), self.softplus(params["z_epsilon_q"])) scale_factor = dist.Chi2(params["scale_factor"]) return {"M": M, "beta": beta, "z_epsilon": z_epsilon, "scale_factor": scale_factor} def _resize_wbench(self, n_instances): if n_instances > self._max_test_instances: self._max_test_instances = n_instances self._test_wbench = np.resize( self._test_wbench, (self._test_epochs, self._max_test_instances, self.n_polarities) ) def _reset(self): """Remove members which cannot be serialized. """ self._logger.info("Parameters before saving.") self.inspect_state() self._alpha_guide_prior_params = None self._param_store = None self._logger = None def _restore(self): """Remove members which cannot be serialized. """ self._logger = LOGGER self._param_store = pyro.get_param_store() self.set_state(self.best_params) self._alpha_guide_prior_params = dict( self._param_store.named_parameters() )
def update_noise_svi( self, observed_steady_state, initial_noise, optimizer: Optional[Type[Optimizer]] = None, lr: float = 0.001, optimizer_kwargs: Optional[Mapping[str, Any]] = None, num_steps: int = 1000, ): observation_model = condition(self.noisy_model, observed_steady_state) pyro.clear_param_store() if optimizer_kwargs is None: optimizer_kwargs = {} if optimizer is None: optimizer = SGD optimizer_kwargs.setdefault('momentum', 0.1) svi = SVI( model=observation_model, guide=self.guide, optim=optimizer({'lr': lr, **optimizer_kwargs}), loss=Trace_ELBO(), ) losses = [] samples = defaultdict(list) for _ in trange(num_steps, desc='Running SVI'): losses.append(svi.step(initial_noise)) for k in initial_noise: mu = f'{k}_mu' sigma = f'{k}_sigma' samples[mu].append(pyro.param(mu).item()) samples[sigma].append(pyro.param(sigma).item()) means = {k: statistics.mean(v) for k, v in samples.items()} # TODO is this a viable replacement? # updated_noise = { # k: (means[f'{k}_mu'], means[f'{k}_sigma']) # for k in initial_noise # } updated_noise = { 'N_SARS_COV2': (means['N_SARS_COV2_mu'], means['N_SARS_COV2_sigma']), 'N_TOCI': (means['N_TOCI_mu'], means['N_TOCI_sigma']), 'N_PRR': (means['N_PRR_mu'], means['N_PRR_sigma']), 'N_ACE2': (means['N_ACE2_mu'], means['N_ACE2_sigma']), 'N_TNF': (means['N_TNF_mu'], means['N_TNF_sigma']), 'N_AngII': (means['N_AngII_mu'], means['N_AngII_sigma']), 'N_AGTR1': (means['N_AGTR1_mu'], means['N_AGTR1_sigma']), 'N_ADAM17': (means['N_ADAM17_mu'], means['N_ADAM17_sigma']), 'N_IL_6Ralpha': (means['N_IL_6Ralpha_mu'], means['N_IL_6Ralpha_sigma']), 'N_sIL_6_alpha': (means['N_sIL_6_alpha_mu'], means['N_sIL_6_alpha_sigma']), 'N_STAT3': (means['N_STAT3_mu'], means['N_STAT3_sigma']), 'N_EGF': (means['N_EGF_mu'], means['N_EGF_sigma']), 'N_EGFR': (means['N_EGFR_mu'], means['N_EGFR_sigma']), 'N_IL6_STAT3': (means['N_IL6_STAT3_mu'], means['N_IL6_STAT3_sigma']), 'N_NF_xB': (means['N_NF_xB_mu'], means['N_NF_xB_sigma']), 'N_IL_6_AMP': (means['N_IL_6_AMP_mu'], means['N_IL_6_AMP_sigma']), 'N_cytokine': (means['N_cytokine_mu'], means['N_cytokine_sigma']) } return updated_noise, losses
def test_elbo_mapdata(batch_size, map_type): # normal-normal: known covariance lam0 = torch.tensor([0.1, 0.1]) # precision of prior loc0 = torch.tensor([0.0, 0.5]) # prior mean # known precision of observation noise lam = torch.tensor([6.0, 4.0]) data = [] sum_data = torch.zeros(2) def add_data_point(x, y): data.append(torch.tensor([x, y])) sum_data.data.add_(data[-1].data) add_data_point(0.1, 0.21) add_data_point(0.16, 0.11) add_data_point(0.06, 0.31) add_data_point(-0.01, 0.07) add_data_point(0.23, 0.25) add_data_point(0.19, 0.18) add_data_point(0.09, 0.41) add_data_point(-0.04, 0.17) data = torch.stack(data) n_data = torch.tensor([float(len(data))]) analytic_lam_n = lam0 + n_data.expand_as(lam) * lam analytic_log_sig_n = -0.5 * torch.log(analytic_lam_n) analytic_loc_n = sum_data * (lam / analytic_lam_n) +\ loc0 * (lam0 / analytic_lam_n) n_steps = 7000 logger.debug("DOING ELBO TEST [bs = {}, map_type = {}]".format( batch_size, map_type)) pyro.clear_param_store() def model(): loc_latent = pyro.sample( "loc_latent", dist.Normal(loc0, torch.pow(lam0, -0.5)).independent(1)) if map_type == "irange": for i in pyro.irange("aaa", len(data), batch_size): pyro.sample("obs_%d" % i, dist.Normal(loc_latent, torch.pow(lam, -0.5)).independent(1), obs=data[i]), elif map_type == "iarange": with pyro.iarange("aaa", len(data), batch_size) as ind: pyro.sample("obs", dist.Normal(loc_latent, torch.pow(lam, -0.5)).independent(1), obs=data[ind]), else: for i, x in enumerate(data): pyro.sample('obs_%d' % i, dist.Normal(loc_latent, torch.pow(lam, -0.5)).independent(1), obs=x) return loc_latent def guide(): loc_q = pyro.param( "loc_q", torch.tensor(analytic_loc_n.data + torch.tensor([-0.18, 0.23]), requires_grad=True)) log_sig_q = pyro.param( "log_sig_q", torch.tensor(analytic_log_sig_n.data - torch.tensor([-0.18, 0.23]), requires_grad=True)) sig_q = torch.exp(log_sig_q) pyro.sample("loc_latent", dist.Normal(loc_q, sig_q).independent(1)) if map_type == "irange" or map_type is None: for i in pyro.irange("aaa", len(data), batch_size): pass elif map_type == "iarange": # dummy iarange to do subsampling for observe with pyro.iarange("aaa", len(data), batch_size): pass else: pass adam = optim.Adam({"lr": 0.0008, "betas": (0.95, 0.999)}) svi = SVI(model, guide, adam, loss=TraceGraph_ELBO()) for k in range(n_steps): svi.step() loc_error = torch.sum( torch.pow(analytic_loc_n - pyro.param("loc_q"), 2.0)) log_sig_error = torch.sum( torch.pow(analytic_log_sig_n - pyro.param("log_sig_q"), 2.0)) if k % 500 == 0: logger.debug("errors - {}, {}".format(loc_error, log_sig_error)) assert_equal(loc_error.item(), 0, prec=0.05) assert_equal(log_sig_error.item(), 0, prec=0.06)
pinned_lookup = torch.nn.Embedding.from_pretrained(torch.FloatTensor(expn.as_matrix().T[1:]),freeze=True) # [1:] is new! pinned_lookup.cuda() torch.manual_seed(3435) imgs = torch.poisson(pinned_lookup.weight) # discretize data # imgs = pinned_lookup.weight.round() # imgs = pinned_lookup.weight dat = torch.utils.data.TensorDataset(imgs, torch.zeros(56,1)) # placeholder arg required pytorch <0.4.0... loader = torch.utils.data.DataLoader(dat, batch_size=args.batch_size, shuffle=False) print(next(iter(loader))[0].size()) # setup the VAE vae = PyroVAE(latent_dim=args.latent_dim) adam_args = {"lr": args.learning_rate} optimizer = Adam(adam_args) svi = SVI(vae.model, vae.guide, optimizer, loss=Trace_ELBO()) # setup visdom for visualization if args.visdom_flag: vis = visdom.Visdom() train_elbo = [] test_elbo = [] # training loop for epoch in range(args.num_epochs): # initialize loss accumulator epoch_loss = 0. # do a training epoch over each mini-batch x returned # by the data loader for x,_ in loader: # do ELBO gradient and accumulate loss
# Prepare training data from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import MinMaxScaler df = MinMaxScaler(feature_range=(0, 10)).fit_transform( new_data[["DA_DEMD", "DA_LMP", "RT_LMP"]].dropna()) # df = new_data[["DA_DEMD", "DA_LMP", "RT_LMP"]] # df = df[np.isfinite(df.rgdppc_2000)] # df["rgdppc_2000"] = np.log(df["rgdppc_2000"]) train = torch.tensor(df, dtype=torch.float) ############################################################################### ############################################################################### from pyro.infer import SVI, Trace_ELBO svi = SVI(model, guide, optim.Adam({"lr": .05}), loss=Trace_ELBO()) DA_DEMD, DA_LMP, RT_LMP = train[:, 0], train[:, 1], train[:, 2] pyro.clear_param_store() num_iters = 5000 list_loss = [] for i in range(num_iters): elbo = svi.step(DA_DEMD, DA_LMP, RT_LMP) list_loss.append(elbo) if i % 500 == 0: logging.info("Elbo loss: {}".format(elbo)) ############################################################################## ############################################################################### ################################################################################
def show_real_motifs(): for i in range(nz): plt.figure(i) locals()['real_motif' + str(i)] = motifs[0, 0, :, :].cpu().numpy() plt.imshow(-locals()['real_motif' + str(i)], cmap="gray") plt.xticks([]) plt.yticks([]) plt.show() # CHANGE: change adam params pyro.clear_param_store() adam_params = {"lr": 0.1} optimizer = pyro.optim.Adam(adam_params) svi = SVI(model, guide, optimizer, loss=Trace_ELBO()) n_steps = 20 # data_cuda = data.cpu() for step in range(n_steps): loss = svi.step(data) print(loss) # CHANGE: change only at the end np.save(file="./mutu_data/qalpha0.npy", arr=pyro.param("qalpha0").detach().cpu().numpy()) np.save(file="./mutu_data/qalpha1.npy", arr=pyro.param("qalpha1").detach().cpu().numpy()) # ADD: quick plot before exhaustive plot
outw_prior = Normal(loc=outw_mu_param, scale=outw_sigma_param).independent(1) # Output layer bias distribution priors outb_mu = torch.randn_like(net.out.bias) outb_sigma = torch.randn_like(net.out.bias) outb_mu_param = pyro.param("outb_mu", outb_mu) outb_sigma_param = softplus(pyro.param("outb_sigma", outb_sigma)) outb_prior = Normal(loc=outb_mu_param, scale=outb_sigma_param) priors = {'fc1.weight': fc1w_prior, 'fc1.bias': fc1b_prior, 'out.weight': outw_prior, 'out.bias': outb_prior} lifted_module = pyro.random_module("module", net, priors) return lifted_module() optim = Adam({"lr": 0.03}) svi = SVI(model, guide, optim, loss=Trace_ELBO(), num_samples=1000) def train(): pyro.clear_param_store() for j in range(num_iterations): # calculate the loss and take a gradient step loss = svi.step(x_data, y_data) if j % 100 == 0: print("[iteration %04d] loss: %.4f" % (j + 1, loss / len(data))) train() for name, value in pyro.get_param_store().items(): print(name, pyro.param(name))